Skip to content

Commit 10ec84b

Browse files
authored
Merge pull request #519 from OP-TED/feature/TED4-106
fixes for extractor
2 parents 8c551e6 + cc4f116 commit 10ec84b

3 files changed

Lines changed: 36 additions & 16 deletions

File tree

ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -276,16 +276,19 @@ def __init__(self, xml_manifestation: XMLManifestation):
276276

277277
@property
278278
def title(self):
279-
title_country = LanguageTaggedString(text=extract_text_from_element(
280-
element=self.manifestation_root.find(self.xpath_registry.xpath_title_country, namespaces=self.namespaces)),language='')
281-
title_text = LanguageTaggedString(
282-
text=extract_text_from_element(element=self.manifestation_root.find(
283-
self.xpath_registry.xpath_title,
284-
namespaces=self.namespaces)),
285-
language=extract_attribute_from_element(element=self.manifestation_root.find(
286-
self.xpath_registry.xpath_title,
287-
namespaces=self.namespaces), attrib_key="languageID"))
288-
return [CompositeTitle(title=title_text, title_country=title_country)]
279+
title_translations = []
280+
title_elements = self.manifestation_root.findall(
281+
self.xpath_registry.xpath_title,
282+
namespaces=self.namespaces)
283+
for title in title_elements:
284+
language = title.find(".").attrib["languageID"]
285+
title_country = LanguageTaggedString(text=language, language=language)
286+
title_text = LanguageTaggedString(
287+
text=extract_text_from_element(element=title),
288+
language=language)
289+
title_translations.append(
290+
CompositeTitle(title=title_text, title_country=title_country))
291+
return title_translations
289292

290293
@property
291294
def publication_date(self):
@@ -324,9 +327,21 @@ def type_of_procedure(self):
324327

325328
@property
326329
def place_of_performance(self):
327-
extracted_nuts_code = extract_text_from_element(
328-
element=self.manifestation_root.find(self.xpath_registry.xpath_place_of_performance, namespaces=self.namespaces))
329-
return [EncodedValue(value=extracted_nuts_code,code=extracted_nuts_code)]
330+
extracted_project_nuts_code = extract_text_from_element(
331+
element=self.manifestation_root.find(self.xpath_registry.xpath_place_of_performance,
332+
namespaces=self.namespaces))
333+
place_of_performance_organisation_elements = self.manifestation_root.findall(
334+
self.xpath_registry.xpath_place_of_performance_elements, namespaces=self.namespaces)
335+
nuts_code_from_organisations = [EncodedValue(code=extract_text_from_element(element=element),
336+
value=extract_text_from_element(element=element)) for element in
337+
place_of_performance_organisation_elements]
338+
339+
if extracted_project_nuts_code:
340+
extracted_project_nuts_encoded = EncodedValue(value=extracted_project_nuts_code,
341+
code=extracted_project_nuts_code)
342+
nuts_code_from_organisations.append(extracted_project_nuts_encoded)
343+
344+
return nuts_code_from_organisations
330345

331346
@property
332347
def common_procurement(self):

ted_sws/notice_metadata_processor/adapters/xpath_registry.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ class EformsXPathRegistry(XPathRegistryABC):
148148

149149
@property
150150
def xpath_title(self):
151-
return ".//cac:ProcurementProject/cbc:Name"
151+
return "./cac:ProcurementProject/cbc:Name"
152152

153153
@property
154154
def xpath_title_country(self):
@@ -176,7 +176,7 @@ def xpath_document_sent_date(self):
176176

177177
@property
178178
def xpath_type_of_contract(self):
179-
return ".//cac:ProcurementProject/cbc:ProcurementTypeCode[@listName='contract-nature']"
179+
return "./cac:ProcurementProject/cbc:ProcurementTypeCode[@listName='contract-nature']"
180180

181181
@property
182182
def xpath_type_of_procedure(self):
@@ -186,6 +186,10 @@ def xpath_type_of_procedure(self):
186186
def xpath_place_of_performance(self):
187187
return ".//cac:ProcurementProject/cac:RealizedLocation/cac:Address/cbc:CountrySubentityCode[@listName='nuts']"
188188

189+
@property
190+
def xpath_place_of_performance_elements(self):
191+
return ".//efac:Organizations/efac:Organization/efac:Company/cac:PostalAddress/cbc:CountrySubentityCode[@listName='nuts']"
192+
189193
@property
190194
def xpath_common_procurement_elements(self):
191195
return ".//cac:ProcurementProject/*/cbc:ItemClassificationCode[@listName='cpv']"

tests/unit/notice_metadata_processor/test_metadata_extractor.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import xml.etree.ElementTree as ET
2+
from importlib.resources import path
23

34
from ted_sws.core.model.manifestation import XMLManifestation
45
from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor import EformsNoticeMetadataExtractor, \
@@ -117,7 +118,7 @@ def test_metadata_eform_extractor(eform_notice_622690):
117118
assert extracted_metadata_dict["extracted_form_number"] == None
118119

119120

120-
def _test_metadata_extractor_for_all_eforms_variations(eforms_xml_notice_paths):
121+
def test_metadata_extractor_for_all_eforms_variations(eforms_xml_notice_paths):
121122
for xml_notice_path in eforms_xml_notice_paths:
122123
notice_id = xml_notice_path.name
123124
eforms_subtype = xml_notice_path.parent.name

0 commit comments

Comments
 (0)