Skip to content

Commit 024f86a

Browse files
Kolea PLESCOschivmeister
authored andcommitted
fix: SF normalization with MS Config, MSSDK CSV-Pandas compatibility
- pass MongoDB client to normalise_notice function - reparse MSSDK CSV list object w/ Pandas to reinterpret numbers - update tests
1 parent f26fc1f commit 024f86a

11 files changed

Lines changed: 96 additions & 45 deletions

File tree

src/ted_sws/notice_metadata_processor/services/metadata_normalizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def normalise_notice(notice: Notice, mongodb_client: MongoClient = None) -> Noti
9797
return notice
9898

9999

100-
def normalise_notice_by_id(notice_id: str, notice_repository: NoticeRepositoryABC) -> Notice:
100+
def normalise_notice_by_id(notice_id: str, notice_repository: NoticeRepositoryABC, mongodb_client: MongoClient = None) -> Notice:
101101
"""
102102
Given a notice id, find the notice in the database, normalise its metadata, and store the updated state.
103103
:param notice_id:
@@ -108,7 +108,7 @@ def normalise_notice_by_id(notice_id: str, notice_repository: NoticeRepositoryAB
108108
if notice is None:
109109
raise ValueError('Notice, with "%s" notice_id, was not found' % notice_id)
110110

111-
return normalise_notice(notice)
111+
return normalise_notice(notice, mongodb_client=mongodb_client)
112112

113113

114114
def create_normalised_metadata_view(normalised_metadata: NormalisedMetadata) -> Optional[NormalisedMetadataView]:

src/ted_sws/resources/mapping_files_registry.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def get_suite_resource_content(self, filename: str):
5555
resource_content = resource['object'] if resource else None
5656

5757
if self.extract_filename_ext(filename) == CSV_EXT:
58-
return pd.DataFrame(resource_content).fillna("")
58+
return pd.DataFrame(resource_content).apply(pd.to_numeric, errors="ignore").fillna("")
5959

6060
return resource_content
6161

test/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ def notice_2020():
156156
notice.set_xml_metadata(XMLMetadata(unique_xpaths=["FAKE_INDEX_XPATHS"]))
157157
notice.set_xml_manifestation(xml_manifestation)
158158
notice.set_original_metadata(original_metadata)
159+
notice.mapping_package_identifier = "test_package_eforms_sdk1.8"
159160
return notice
160161

161162

test/e2e/conftest.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from pymongo import MongoClient
55

66
from src.ted_sws import config
7+
from src.ted_sws.data_manager.adapters.mapping_package_repository import MappingPackageRepositoryMongoDB
8+
from src.ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryMongoDB
79
from src.ted_sws.data_manager.adapters.notice_repository import NoticeRepository
810
from src.ted_sws.data_manager.adapters.triple_store import AllegroGraphTripleStore, FusekiAdapter
911

@@ -78,3 +80,11 @@ def path_to_file_system_repository():
7880
@pytest.fixture
7981
def fake_notice_repository(fake_mongodb_client):
8082
return NoticeRepository(mongodb_client=fake_mongodb_client)
83+
84+
85+
@pytest.fixture
86+
def load_mapping_suite_and_package(mongodb_client, mapping_suite, mapping_package):
87+
mapping_suite_repository = MappingSuiteRepositoryMongoDB(mongodb_client=mongodb_client)
88+
mapping_suite_repository.add(mapping_suite=mapping_suite)
89+
mapping_package_repository = MappingPackageRepositoryMongoDB(mongodb_client=mongodb_client)
90+
mapping_package_repository.add(mapping_package=mapping_package)

test/e2e/data_sampler/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def mongodb_client():
2121

2222

2323
@pytest.fixture
24-
def notice_repository_with_indexed_notices(mongodb_client) -> NoticeRepository:
24+
def notice_repository_with_indexed_notices(mongodb_client, load_mapping_suite_and_package) -> NoticeRepository:
2525

2626
load_mapping_suite_and_packages_from_github_to_mongo_db(
2727
mapping_package_name="package_F03_test",
@@ -31,6 +31,6 @@ def notice_repository_with_indexed_notices(mongodb_client) -> NoticeRepository:
3131
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
3232
for notice in notice_repository.list():
3333
indexed_notice = index_notice(notice=notice)
34-
normalised_notice = normalise_notice(notice=indexed_notice)
34+
normalised_notice = normalise_notice(notice=indexed_notice, mongodb_client=mongodb_client)
3535
notice_repository.update(notice=normalised_notice)
3636
return notice_repository

test/unit/notice_metadata_processor/test_eligibility.py

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,37 +6,48 @@
66
from src.ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice
77
from src.ted_sws.notice_metadata_processor.services.notice_eligibility import check_package, \
88
notice_eligibility_checker, notice_eligibility_checker_by_id, format_version_with_zero_patch, is_date_in_range
9+
from test.unit.notice_metadata_processor import load_mapping_suite_and_package
910

1011

11-
def test_non_eligibility_by_notice(notice_eligibility_repository_path, indexed_notice):
12-
mapping_package_repository = MappingPackageRepositoryInFileSystem(repository_path=notice_eligibility_repository_path)
13-
normalise_notice(notice=indexed_notice)
12+
def test_non_eligibility_by_notice(notice_eligibility_repository_path, indexed_notice,
13+
mongodb_client, load_mapping_suite_and_package):
14+
mapping_package_repository = MappingPackageRepositoryInFileSystem(
15+
repository_path=notice_eligibility_repository_path)
16+
normalise_notice(notice=indexed_notice, mongodb_client=mongodb_client)
1417
notice_eligibility_checker(notice=indexed_notice, mapping_package_repository=mapping_package_repository)
1518
assert indexed_notice.status == NoticeStatus.INELIGIBLE_FOR_TRANSFORMATION
1619

1720

18-
def test_eforms_eligibility_by_notice(notice_eligibility_repository_path, indexed_eform_notice_622690):
19-
mapping_package_repository = MappingPackageRepositoryInFileSystem(repository_path=notice_eligibility_repository_path)
20-
normalise_notice(notice=indexed_eform_notice_622690)
21-
notice_eligibility_checker(notice=indexed_eform_notice_622690, mapping_package_repository=mapping_package_repository)
21+
def test_eforms_eligibility_by_notice(notice_eligibility_repository_path, indexed_eform_notice_622690,
22+
mongodb_client, load_mapping_suite_and_package):
23+
mapping_package_repository = MappingPackageRepositoryInFileSystem(
24+
repository_path=notice_eligibility_repository_path)
25+
normalise_notice(notice=indexed_eform_notice_622690, mongodb_client=mongodb_client)
26+
notice_eligibility_checker(notice=indexed_eform_notice_622690,
27+
mapping_package_repository=mapping_package_repository)
2228
assert indexed_eform_notice_622690.status == NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION
2329

2430

25-
def test_eligibility_by_notice(notice_eligibility_repository_path, notice_2020):
26-
mapping_package_repository = MappingPackageRepositoryInFileSystem(repository_path=notice_eligibility_repository_path)
27-
normalise_notice(notice=notice_2020)
28-
notice_checker = notice_eligibility_checker(notice=notice_2020, mapping_package_repository=mapping_package_repository)
31+
def test_eligibility_by_notice(notice_eligibility_repository_path, notice_2020,
32+
mongodb_client, load_mapping_suite_and_package):
33+
mapping_package_repository = MappingPackageRepositoryInFileSystem(
34+
repository_path=notice_eligibility_repository_path)
35+
normalise_notice(notice=notice_2020, mongodb_client=mongodb_client)
36+
notice_checker = notice_eligibility_checker(notice=notice_2020,
37+
mapping_package_repository=mapping_package_repository)
2938
notice_id, mapping_package_identifier = notice_checker
3039
assert notice_id == "408313-2020"
3140
assert mapping_package_identifier == "test_package2"
3241
assert notice_2020.status == NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION
3342

3443

35-
def test_eligibility_by_notice_id(notice_eligibility_repository_path, notice_2020, notice_repository):
36-
normalise_notice(notice=notice_2020)
44+
def test_eligibility_by_notice_id(notice_eligibility_repository_path, notice_2020, notice_repository,
45+
mongodb_client, load_mapping_suite_and_package):
46+
normalise_notice(notice=notice_2020, mongodb_client=mongodb_client)
3747
notice_repository.add(notice_2020)
38-
mapping_package_repository = MappingPackageRepositoryInFileSystem(repository_path=notice_eligibility_repository_path)
39-
notice_checker = notice_eligibility_checker_by_id(notice_id="408313-2020",
48+
mapping_package_repository = MappingPackageRepositoryInFileSystem(
49+
repository_path=notice_eligibility_repository_path)
50+
notice_checker = notice_eligibility_checker_by_id(notice_id="408313-2020",
4051
mapping_package_repository=mapping_package_repository,
4152
notice_repository=notice_repository)
4253
notice_id, mapping_package_identifier = notice_checker
@@ -47,8 +58,9 @@ def test_eligibility_by_notice_id(notice_eligibility_repository_path, notice_202
4758

4859

4960
def test_check_mapping_package(notice_eligibility_repository_path, normalised_metadata_object,
50-
eform_normalised_metadata_object):
51-
mapping_package_repository = MappingPackageRepositoryInFileSystem(repository_path=notice_eligibility_repository_path)
61+
eform_normalised_metadata_object):
62+
mapping_package_repository = MappingPackageRepositoryInFileSystem(
63+
repository_path=notice_eligibility_repository_path)
5264
is_valid = check_package(mapping_package=mapping_package_repository.get("test_package"),
5365
notice_metadata=normalised_metadata_object)
5466

test/unit/notice_metadata_processor/test_mapping_files_registry.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,36 @@
11
import pandas as pd
22

33
from src.ted_sws.resources.mapping_files_registry import MappingFilesRegistry
4+
from test.unit.notice_metadata_processor import load_mapping_suite_and_package
45

56

6-
def test_mapping_file_registry():
7-
json_resource_files = [MappingFilesRegistry().countries, MappingFilesRegistry().notice_type,
8-
MappingFilesRegistry().languages,
9-
MappingFilesRegistry().legal_basis]
7+
def test_mapping_file_registry(indexed_notice, mongodb_client, load_mapping_suite_and_package):
8+
json_resource_files = [
9+
MappingFilesRegistry(
10+
notice=indexed_notice, mongodb_client=mongodb_client
11+
).countries,
12+
MappingFilesRegistry(
13+
notice=indexed_notice, mongodb_client=mongodb_client
14+
).notice_type,
15+
MappingFilesRegistry(
16+
notice=indexed_notice, mongodb_client=mongodb_client
17+
).languages,
18+
MappingFilesRegistry(
19+
notice=indexed_notice, mongodb_client=mongodb_client
20+
).legal_basis
21+
]
1022
for file_content in json_resource_files:
1123
assert isinstance(file_content, dict)
1224
assert "results" in file_content.keys()
1325

14-
csv_resource_files = [MappingFilesRegistry().sf_notice_df, MappingFilesRegistry().ef_notice_df]
26+
csv_resource_files = [
27+
MappingFilesRegistry(
28+
notice=indexed_notice, mongodb_client=mongodb_client
29+
).sf_notice_df,
30+
MappingFilesRegistry(
31+
notice=indexed_notice, mongodb_client=mongodb_client
32+
).ef_notice_df
33+
]
1534

1635
for file_content in csv_resource_files:
1736
assert isinstance(file_content, pd.DataFrame)

test/unit/notice_metadata_processor/test_metadata_normaliser.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
from xml.etree.ElementTree import ParseError
33

44
import pytest
5+
56
from src.ted_sws.core.model.manifestation import XMLManifestation
67
from src.ted_sws.core.model.metadata import NormalisedMetadata, LanguageTaggedString
78
from src.ted_sws.core.model.notice import NoticeStatus, Notice
8-
from src.ted_sws.data_manager.adapters.mapping_package_repository import MappingPackageRepositoryMongoDB
9-
from src.ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryMongoDB
9+
from src.ted_sws.data_manager.adapters.notice_repository import NoticeRepository
1010
from src.ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor import \
1111
DefaultNoticeMetadataExtractor, EformsNoticeMetadataExtractor
1212
from src.ted_sws.notice_metadata_processor.adapters.notice_metadata_normaliser import \
@@ -26,17 +26,19 @@ def html_str(content: str) -> str:
2626
return f"""<?xml version="1.0" encoding="UTF-8"?> <body>{content}</body>"""
2727

2828

29-
def test_metadata_normaliser_by_notice(indexed_notice):
30-
notice = normalise_notice(indexed_notice)
29+
def test_metadata_normaliser_by_notice(indexed_notice, mongodb_client, load_mapping_suite_and_package):
30+
notice = normalise_notice(indexed_notice, mongodb_client=mongodb_client)
3131
assert notice.normalised_metadata
3232
assert notice.normalised_metadata.title
3333
assert isinstance(notice.normalised_metadata.eforms_subtype, str)
3434
assert notice.status == NoticeStatus.NORMALISED_METADATA
3535

3636

37-
def test_metadata_normaliser_by_notice_id(notice_id, notice_repository, notice_2020):
37+
def test_metadata_normaliser_by_notice_id(notice_id, notice_2020, mongodb_client, load_mapping_suite_and_package):
38+
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
3839
notice_repository.add(notice_2020)
39-
notice = normalise_notice_by_id(notice_id=notice_2020.ted_id, notice_repository=notice_repository)
40+
notice = normalise_notice_by_id(notice_id=notice_2020.ted_id, notice_repository=notice_repository,
41+
mongodb_client=mongodb_client)
4042
assert notice.normalised_metadata
4143
assert notice.normalised_metadata.title
4244
assert notice.status == NoticeStatus.NORMALISED_METADATA
@@ -48,9 +50,9 @@ def test_metadata_normaliser_by_wrong_notice_id(notice_repository):
4850
normalise_notice_by_id(notice_id=notice_id, notice_repository=notice_repository)
4951

5052

51-
def test_metadata_normaliser(indexed_notice):
53+
def test_metadata_normaliser(indexed_notice, mongodb_client, load_mapping_suite_and_package):
5254
notice = indexed_notice
53-
normalise_notice(notice=notice)
55+
normalise_notice(notice=notice, mongodb_client=mongodb_client)
5456

5557
assert notice.normalised_metadata
5658
assert notice.normalised_metadata.title
@@ -213,8 +215,9 @@ def test_find_metadata_extractor_based_on_xml_manifestation(eform_notice_622690,
213215
DefaultNoticeMetadataExtractor)
214216

215217

216-
def test_find_metadata_normaliser_based_on_xml_manifestation(eform_notice_622690, notice_2018, mongodb_client,
217-
load_mapping_suite_and_package):
218+
def test_find_metadata_normaliser_based_on_xml_manifestation(
219+
eform_notice_622690, notice_2018, mongodb_client, load_mapping_suite_and_package
220+
):
218221
assert isinstance(
219222
find_metadata_normaliser_based_on_xml_manifestation(
220223
notice=eform_notice_622690,

test/unit/notice_packager/conftest.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616
DefaultNoticeMetadataExtractor
1717
from src.ted_sws.notice_metadata_processor.adapters.notice_metadata_normaliser import \
1818
DefaultNoticeMetadataNormaliser
19-
from src.ted_sws.notice_packager.model.metadata import PackagerMetadata, NoticeMetadata, WorkMetadata, ExpressionMetadata, \
19+
from src.ted_sws.notice_packager.model.metadata import PackagerMetadata, NoticeMetadata, WorkMetadata, \
20+
ExpressionMetadata, \
2021
ManifestationMetadata
2122
from test import TEST_DATA_PATH
23+
from test.unit.notice_metadata_processor import load_mapping_suite_and_package
2224

2325

2426
# template_metadata START
@@ -75,10 +77,14 @@ def template_sample_manifestation(template_sample_metadata) -> ManifestationMeta
7577
# notice_metadata START
7678

7779
@pytest.fixture
78-
def notice_sample_metadata(notice_2018) -> NormalisedMetadata:
79-
normalised_metadata = DefaultNoticeMetadataNormaliser().normalise_metadata(
80+
def notice_sample_metadata(notice_2018, mongodb_client, load_mapping_suite_and_package) -> NormalisedMetadata:
81+
normalised_metadata = DefaultNoticeMetadataNormaliser(
82+
notice=notice_2018, mongodb_client=mongodb_client
83+
).normalise_metadata(
8084
extracted_metadata=DefaultNoticeMetadataExtractor(
81-
xml_manifestation=notice_2018.xml_manifestation).extract_metadata())
85+
xml_manifestation=notice_2018.xml_manifestation
86+
).extract_metadata()
87+
)
8288

8389
return normalised_metadata
8490

@@ -119,4 +125,4 @@ def notice_id():
119125
@pytest.fixture
120126
def work_id_predicate():
121127
"""Returns the URI predicate for the CDM work identifier."""
122-
return "http://publications.europa.eu/ontology/cdm#work_id"
128+
return "http://publications.europa.eu/ontology/cdm#work_id"

test/unit/notice_packager/test_metadata_transformer.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,3 @@ def test_publication_notice_uri(notice_id, notice_sample_metadata):
4747
def test_publication_work_identifier(notice_id, notice_sample_metadata):
4848
work_id = publication_work_identifier(notice_id, notice_sample_metadata)
4949
assert work_id == "2018_S_22_196390"
50-

0 commit comments

Comments
 (0)