Skip to content

Commit 76af177

Browse files
committed
changed packager to work with normalised metadata and fixed tests
1 parent dbf94d6 commit 76af177

6 files changed

Lines changed: 80 additions & 27 deletions

File tree

ted_sws/notice_packager/services/metadata_transformer.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import datetime
1515

16+
from ted_sws.core.model.metadata import NormalisedMetadata
1617
from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata
1718
from ted_sws.notice_packager.model.metadata import PackagerMetadata, METS_TYPE_CREATE, LANGUAGE, REVISION, BASE_WORK, \
1819
BASE_TITLE, METS_DMD_HREF, METS_DMD_ID, METS_TMD_ID, METS_TMD_HREF, METS_FILE_ID, METS_NOTICE_FILE_HREF
@@ -28,7 +29,7 @@
2829

2930

3031
class MetadataTransformer:
31-
def __init__(self, notice_metadata: ExtractedMetadata):
32+
def __init__(self, notice_metadata: NormalisedMetadata):
3233
self.notice_metadata = notice_metadata
3334

3435
def template_metadata(self, action: str = METS_TYPE_CREATE) -> PackagerMetadata:
@@ -50,32 +51,32 @@ def normalize_value(cls, value: str) -> str:
5051
return value.replace(DENORMALIZED_SEPARATOR, NORMALIZED_SEPARATOR)
5152

5253
@classmethod
53-
def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMetadata:
54+
def from_notice_metadata(cls, notice_metadata: NormalisedMetadata) -> PackagerMetadata:
5455
_date = datetime.datetime.now()
5556
_revision = REVISION
5657

5758
metadata = PackagerMetadata()
58-
5959
# NOTICE
6060
metadata.notice.id = cls.normalize_value(notice_metadata.notice_publication_number)
6161
metadata.notice.public_number_document = publication_notice_number(metadata.notice.id)
6262
metadata.notice.public_number_edition = publication_notice_year(
6363
notice_metadata) + filled_ojs_issue_number(notice_metadata.ojs_issue_number)
6464

6565
# WORK
66-
publication_date = datetime.datetime.strptime(notice_metadata.publication_date, '%Y%m%d').strftime('%Y-%m-%d')
66+
publication_date = datetime.datetime.fromisoformat(notice_metadata.publication_date).strftime('%Y-%m-%d')
6767
metadata.work.identifier = publication_work_identifier(metadata.notice.id, notice_metadata)
6868
metadata.work.oj_identifier = publication_work_oj_identifier(metadata.notice.id, notice_metadata)
6969
metadata.work.cdm_rdf_type = PROCUREMENT_PUBLIC
7070
metadata.work.resource_type = PROCUREMENT_NOTICE
7171
metadata.work.date_document = publication_date
7272
metadata.work.uri = publication_notice_uri(metadata.notice.id, notice_metadata)
73-
title_search = [t.title.text for t in notice_metadata.title if t.title.language == LANGUAGE.upper()]
73+
# TODO: If no title found in english get a random one
74+
title_search = [title.text for title in notice_metadata.title if title.language == LANGUAGE.upper()]
7475
if len(title_search) > 0:
7576
metadata.work.title = {LANGUAGE: title_search[0]}
7677
metadata.work.dataset_version = _date.strftime('%Y%m%d') + '-' + _revision
7778
metadata.work.procurement_public_issued_by_country = notice_metadata.country_of_buyer
78-
metadata.work.procurement_public_url_etendering = notice_metadata.uri_list
79+
# metadata.work.procurement_public_url_etendering = notice_metadata.uri_list
7980

8081
# EXPRESSION
8182
metadata.expression.identifier = f"{metadata.work.identifier}.MUL"
@@ -118,7 +119,7 @@ def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMet
118119

119120

120121
def publication_notice_year(notice_metadata):
121-
return datetime.datetime.strptime(notice_metadata.publication_date, '%Y%m%d').strftime('%Y')
122+
return str(datetime.datetime.fromisoformat(notice_metadata.publication_date).year)
122123

123124

124125
def publication_notice_number(notice_id):

tests/test_data/notices/eform-622690-2023.json

Lines changed: 50 additions & 0 deletions
Large diffs are not rendered by default.

tests/unit/notice_metadata_processor/test_eligibility.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
from ted_sws.core.model.notice import NoticeStatus
22
from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryInFileSystem
3-
from ted_sws.notice_metadata_processor.services.metadata_normalizer import MetadataNormaliser
3+
from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice
44
from ted_sws.notice_metadata_processor.services.notice_eligibility import check_package, \
55
notice_eligibility_checker, notice_eligibility_checker_by_id
66

77

88
def test_non_eligibility_by_notice(notice_eligibility_repository_path, indexed_notice):
99
mapping_suite_repository = MappingSuiteRepositoryInFileSystem(repository_path=notice_eligibility_repository_path)
10-
MetadataNormaliser(notice=indexed_notice).normalise_metadata()
10+
normalise_notice(notice=indexed_notice)
1111
notice_eligibility_checker(notice=indexed_notice, mapping_suite_repository=mapping_suite_repository)
1212
assert indexed_notice.status == NoticeStatus.INELIGIBLE_FOR_TRANSFORMATION
1313

1414

1515
def test_eligibility_by_notice(notice_eligibility_repository_path, notice_2020):
1616
mapping_suite_repository = MappingSuiteRepositoryInFileSystem(repository_path=notice_eligibility_repository_path)
17-
MetadataNormaliser(notice=notice_2020).normalise_metadata()
17+
normalise_notice(notice=notice_2020)
1818
notice_checker = notice_eligibility_checker(notice=notice_2020, mapping_suite_repository=mapping_suite_repository)
1919
notice_id, mapping_suite_identifier = notice_checker
2020
assert notice_id == "408313-2020"
@@ -23,7 +23,7 @@ def test_eligibility_by_notice(notice_eligibility_repository_path, notice_2020):
2323

2424

2525
def test_eligibility_by_notice_id(notice_eligibility_repository_path, notice_2020, notice_repository):
26-
MetadataNormaliser(notice=notice_2020).normalise_metadata()
26+
normalise_notice(notice=notice_2020)
2727
notice_repository.add(notice_2020)
2828
mapping_suite_repository = MappingSuiteRepositoryInFileSystem(repository_path=notice_eligibility_repository_path)
2929
notice_checker = notice_eligibility_checker_by_id(notice_id="408313-2020",
@@ -52,5 +52,4 @@ def test_check_mapping_suite(notice_eligibility_repository_path, normalised_meta
5252
normalised_metadata_object.eforms_subtype = "88"
5353
is_valid = check_package(mapping_suite=mapping_suite_repository.get("test_package"),
5454
notice_metadata=normalised_metadata_object)
55-
print(is_valid)
56-
assert not is_valid
55+
assert not is_valid

tests/unit/notice_metadata_processor/test_metadata_extractor.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
def test_metadata_extractor(indexed_notice):
1111
metadata_extractor = DefaultNoticeMetadataExtractor(
1212
xml_manifestation=indexed_notice.xml_manifestation).extract_metadata()
13-
extracted_metadata_dict = metadata_extractor.model_dump()
13+
extracted_metadata_dict = metadata_extractor.dict()
1414

1515
assert isinstance(metadata_extractor, ExtractedMetadata)
16-
assert extracted_metadata_dict.keys() == ExtractedMetadata.model_fields.keys()
16+
assert extracted_metadata_dict.keys() == ExtractedMetadata.__fields__.keys()
1717
assert "extracted_form_number", "xml_schema" in extracted_metadata_dict.keys()
1818
assert "067623-2022" in extracted_metadata_dict["notice_publication_number"]
1919
assert "http://publications.europa.eu/resource/schema/ted/R2.0.8/publication TED_EXPORT.xsd" in \
@@ -25,29 +25,29 @@ def test_metadata_extractor_2016(notice_2016):
2525
metadata_extractor = DefaultNoticeMetadataExtractor(
2626
xml_manifestation=notice_2016.xml_manifestation).extract_metadata()
2727

28-
extracted_metadata_dict = metadata_extractor.model_dump()
28+
extracted_metadata_dict = metadata_extractor.dict()
2929
assert isinstance(metadata_extractor, ExtractedMetadata)
30-
assert extracted_metadata_dict.keys() == ExtractedMetadata.model_fields.keys()
30+
assert extracted_metadata_dict.keys() == ExtractedMetadata.__fields__.keys()
3131
assert notice_2016.ted_id in extracted_metadata_dict["notice_publication_number"]
3232

3333

3434
def test_metadata_extractor_2015(notice_2015):
3535
metadata_extractor = DefaultNoticeMetadataExtractor(
3636
xml_manifestation=notice_2015.xml_manifestation).extract_metadata()
3737

38-
extracted_metadata_dict = metadata_extractor.model_dump()
38+
extracted_metadata_dict = metadata_extractor.dict()
3939
assert isinstance(metadata_extractor, ExtractedMetadata)
40-
assert extracted_metadata_dict.keys() == ExtractedMetadata.model_fields.keys()
40+
assert extracted_metadata_dict.keys() == ExtractedMetadata.__fields__.keys()
4141
assert notice_2015.ted_id in extracted_metadata_dict["notice_publication_number"]
4242

4343

4444
def test_metadata_extractor_2018(notice_2018):
4545
metadata_extractor = DefaultNoticeMetadataExtractor(
4646
xml_manifestation=notice_2018.xml_manifestation).extract_metadata()
4747

48-
extracted_metadata_dict = metadata_extractor.model_dump()
48+
extracted_metadata_dict = metadata_extractor.dict()
4949
assert isinstance(metadata_extractor, ExtractedMetadata)
50-
assert extracted_metadata_dict.keys() == ExtractedMetadata.model_fields.keys()
50+
assert extracted_metadata_dict.keys() == ExtractedMetadata.__fields__.keys()
5151
assert notice_2018.ted_id in extracted_metadata_dict["notice_publication_number"]
5252

5353

@@ -108,10 +108,10 @@ def test_get_normalised_namespaces(indexed_notice):
108108
def test_metadata_eform_extractor(eform_notice_622690):
109109
metadata_extractor = EformsNoticeMetadataExtractor(
110110
xml_manifestation=eform_notice_622690.xml_manifestation).extract_metadata()
111-
extracted_metadata_dict = metadata_extractor.model_dump()
111+
extracted_metadata_dict = metadata_extractor.dict()
112112
print(extracted_metadata_dict)
113113
assert isinstance(metadata_extractor, ExtractedMetadata)
114-
assert extracted_metadata_dict.keys() == ExtractedMetadata.model_fields.keys()
114+
assert extracted_metadata_dict.keys() == ExtractedMetadata.__fields__.keys()
115115
assert "extracted_form_number", "xml_schema" in extracted_metadata_dict.keys()
116116
assert "00622690-2023" in extracted_metadata_dict["notice_publication_number"]
117117
assert "competition" in extracted_metadata_dict["extracted_eform_type"]

tests/unit/notice_packager/test_metadata_transformer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,17 @@
66
# Email: kalean.bl@gmail.com
77

88
""" """
9-
9+
from ted_sws.core.model.metadata import NormalisedMetadata
1010
from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata
1111
from ted_sws.notice_packager.services.metadata_transformer import MetadataTransformer, publication_notice_uri, \
1212
publication_notice_year, publication_work_identifier, publication_notice_number, NORMALIZED_SEPARATOR
1313

1414

15-
def test_notice_metadata(notice_sample_metadata: ExtractedMetadata):
16-
assert isinstance(notice_sample_metadata, ExtractedMetadata)
15+
def test_notice_metadata(notice_sample_metadata: NormalisedMetadata):
16+
assert isinstance(notice_sample_metadata, NormalisedMetadata)
1717

1818

19-
def test_metadata_transformer(notice_sample_metadata: ExtractedMetadata):
19+
def test_metadata_transformer(notice_sample_metadata: NormalisedMetadata):
2020
metadata_transformer = MetadataTransformer(notice_sample_metadata)
2121
template_metadata = metadata_transformer.template_metadata()
2222

tests/unit/notice_packager/test_notice_packager.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,14 @@
99

1010
from ted_sws.core.model.manifestation import RDFManifestation
1111
from ted_sws.core.model.notice import NoticeStatus
12+
from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice
1213
from ted_sws.notice_packager.model.metadata import METS_TYPE_CREATE
1314
from ted_sws.notice_packager.services.notice_packager import package_notice, NoticePackager
1415

1516

1617
def test_notice_packager_with_notice(notice_2018, rdf_content):
18+
notice_2018._status = NoticeStatus.INDEXED
19+
normalise_notice(notice=notice_2018)
1720
rdf_manifestation = RDFManifestation(object_data=rdf_content)
1821
notice_2018._status = NoticeStatus.ELIGIBLE_FOR_PACKAGING
1922
notice_2018._rdf_manifestation = rdf_manifestation

0 commit comments

Comments
 (0)