Skip to content

Commit d08b0c6

Browse files
committed
fix: circular bug - decouple MappingFilesRegistry from notice, package
There was a hidden circular dependency in the metadata resource migration to MS Config via MSSDK. The previous design required a notice with `mapping_package_identifier` to load resources, but this created a circular dependency: normalisation needs resources, yet eligibility checking (which returns a package identifier but does not set one on the notice) needs normalised metadata. Initial assumptions may have been anchored on the resources being project-specific. However, this is problematic as not all projects may be updated with the mapping suite configuration. Therefore, resource files (country.json, languages.json, etc.) can be interpreted to be global for now during the transition period. Once all currently known production projects are updated with the configuration, a more dynamic method to select the mapping suite can be implemented, for e.g. via the `document_probing` conditions specified in the config, which defines what XPaths must and must not be available to be compatible with the project. Changes: - MappingFilesRegistry now loads resources from any available MappingSuite - Removed notice parameter from DefaultNoticeMetadataNormaliser and EformsNoticeMetadataNormaliser constructors - Updated find_metadata_normaliser_based_on_xml_manifestation() and extract_and_normalise_notice_metadata() to not require notice - Added MappingSuiteConfigError for when no MappingSuite is available - Updated all test fixtures to use the new API - Remove all traces and dependence on a Notice mapping_package_identifier TODO: The mapping suite must be made mandatory and be fetched from a default known project with the configuration if not given.
1 parent 6b6373f commit d08b0c6

13 files changed

Lines changed: 65 additions & 94 deletions

File tree

src/ted_sws/core/model/notice.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,6 @@ class Notice(LazyWorkExpression):
192192
_mets_manifestation: Optional[METSManifestation] = None
193193
_xml_metadata: Optional[XMLMetadata] = None
194194
validation_summary: Optional[ValidationSummaryReport] = None
195-
mapping_package_identifier: Optional[str] = None
196195

197196
@computed_field
198197
@property

src/ted_sws/mapping_suite_processor/services/mapping_package_processor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def mapping_package_processor_load_package_in_mongo_db(
7474
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
7575
for test_data in tests_data:
7676
notice_id = test_data.file_name.split(".")[0]
77-
notice = Notice(ted_id=notice_id, mapping_package_identifier=package.identifier)
77+
notice = Notice(ted_id=notice_id)
7878
notice.set_xml_manifestation(XMLManifestation(object_data=test_data.file_content))
7979
notice_repository.add(notice=notice)
8080
result_notice_ids.append(notice_id)

src/ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from pymongo import MongoClient
99

1010
from src.ted_sws.core.model.metadata import NormalisedMetadata, LanguageTaggedString, NoticeSource
11-
from src.ted_sws.core.model.notice import Notice
1211
from src.ted_sws.event_manager.services.log import log_notice_info
1312
from src.ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata
1413
from src.ted_sws.notice_metadata_processor.services.metadata_constraints import filter_df_by_variables
@@ -89,8 +88,8 @@ def normalise_metadata(self, extracted_metadata: ExtractedMetadata) -> Normalise
8988

9089

9190
class DefaultNoticeMetadataNormaliser(NoticeMetadataNormaliserABC):
92-
def __init__(self, notice: Notice, mongodb_client: MongoClient = None):
93-
self.mapping_registry = MappingFilesRegistry(notice=notice, mongodb_client=mongodb_client)
91+
def __init__(self, mongodb_client: MongoClient = None):
92+
self.mapping_registry = MappingFilesRegistry(mongodb_client=mongodb_client)
9493

9594
@classmethod
9695
def normalise_legal_basis_value(cls, value: str) -> str:
@@ -278,8 +277,8 @@ class EformsNoticeMetadataNormaliser(NoticeMetadataNormaliserABC):
278277
"""
279278
Metadata normaliser for eForms
280279
"""
281-
def __init__(self, notice: Notice, mongodb_client: MongoClient = None):
282-
self.mapping_registry = MappingFilesRegistry(notice=notice, mongodb_client=mongodb_client)
280+
def __init__(self, mongodb_client: MongoClient = None):
281+
self.mapping_registry = MappingFilesRegistry(mongodb_client=mongodb_client)
283282

284283
@classmethod
285284
def iso_date_format(cls, _date: str, with_none=False):

src/ted_sws/notice_metadata_processor/services/metadata_normalizer.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,16 @@ def find_metadata_extractor_based_on_xml_manifestation(
3434

3535

3636
def find_metadata_normaliser_based_on_xml_manifestation(
37-
notice: Notice,
3837
xml_manifestation: XMLManifestation,
3938
mongodb_client: MongoClient = None
4039
) -> NoticeMetadataNormaliserABC:
4140
"""
4241
Find the correct extractor based on the XML Manifestation
4342
"""
4443
if check_if_xml_manifestation_is_eform(xml_manifestation):
45-
return EformsNoticeMetadataNormaliser(notice=notice, mongodb_client=mongodb_client)
44+
return EformsNoticeMetadataNormaliser(mongodb_client=mongodb_client)
4645
else:
47-
return DefaultNoticeMetadataNormaliser(notice=notice, mongodb_client=mongodb_client)
46+
return DefaultNoticeMetadataNormaliser(mongodb_client=mongodb_client)
4847

4948

5049
def extract_notice_metadata(metadata_extractor: NoticeMetadataExtractorABC) -> ExtractedMetadata:
@@ -62,14 +61,13 @@ def normalise_notice_metadata(extracted_metadata: ExtractedMetadata,
6261
return metadata_normaliser.normalise_metadata(extracted_metadata)
6362

6463

65-
def extract_and_normalise_notice_metadata(notice: Notice, xml_manifestation: XMLManifestation, mongodb_client: MongoClient = None) -> NormalisedMetadata:
64+
def extract_and_normalise_notice_metadata(xml_manifestation: XMLManifestation, mongodb_client: MongoClient = None) -> NormalisedMetadata:
6665
"""
6766
Extract and normalise metadata using the correct extractor and normaliser type
6867
"""
6968
metadata_extractor = find_metadata_extractor_based_on_xml_manifestation(xml_manifestation)
7069
extracted_metadata = extract_notice_metadata(metadata_extractor)
7170
metadata_normaliser = find_metadata_normaliser_based_on_xml_manifestation(
72-
notice=notice,
7371
xml_manifestation=xml_manifestation,
7472
mongodb_client=mongodb_client
7573
)
@@ -82,7 +80,7 @@ def extract_and_normalise_notice_metadata_from_notice(notice: Notice, mongodb_cl
8280
Extract and normalise metadata using the correct extractor and normaliser type
8381
"""
8482
xml_manifestation = notice.xml_manifestation
85-
return extract_and_normalise_notice_metadata(notice=notice, xml_manifestation=xml_manifestation, mongodb_client=mongodb_client)
83+
return extract_and_normalise_notice_metadata(xml_manifestation=xml_manifestation, mongodb_client=mongodb_client)
8684

8785

8886
def normalise_notice(notice: Notice, mongodb_client: MongoClient = None) -> Notice:

src/ted_sws/resources/mapping_files_registry.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
from pymongo import MongoClient
55

66
from src.ted_sws import config
7-
from src.ted_sws.core.model.notice import Notice
8-
from src.ted_sws.data_manager.adapters.mapping_package_repository import MappingPackageRepositoryMongoDB
97
from src.ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryMongoDB
108

119
COUNTRIES_MAPPING_FILE = "country.json"
@@ -21,18 +19,31 @@
2119
CSV_EXT = ".csv"
2220

2321

22+
class MappingSuiteConfigError(Exception):
23+
"""Raised when no MappingSuite is found in the database."""
24+
pass
25+
26+
2427
class MappingFilesRegistry:
2528
"""
26-
Registry of mapping files. This will return the specific file content
29+
Registry of mapping files. This will return the specific file content.
30+
31+
Resource files (country.json, languages.json, etc.) are global and identical
32+
across all mapping packages, so we can load them from any available MappingSuite.
2733
"""
2834

29-
def __init__(self, notice: Notice, mongodb_client: MongoClient = None):
35+
def __init__(self, mongodb_client: MongoClient = None):
3036
if not mongodb_client:
3137
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
32-
mapping_package_repository = MappingPackageRepositoryMongoDB(mongodb_client=mongodb_client)
33-
mapping_package = mapping_package_repository.get(notice.mapping_package_identifier)
3438
mapping_suite_repository = MappingSuiteRepositoryMongoDB(mongodb_client=mongodb_client)
35-
self.mapping_suite = mapping_suite_repository.get(mapping_package.mapping_suite_identifier)
39+
# Get any available MappingSuite - resources are global/identical across all suites
40+
all_suites = mapping_suite_repository.list()
41+
if not all_suites:
42+
raise MappingSuiteConfigError(
43+
"No MappingSuite found in the database. Please ensure at least one "
44+
"mapping suite is loaded before attempting to normalise notices."
45+
)
46+
self.mapping_suite = all_suites[0]
3647

3748
@staticmethod
3849
def extract_filename_from_path(path: str) -> str:

test/conftest.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ def raw_notice(ted_document_search, notice_repository, notice_id) -> Notice:
8686
@pytest.fixture
8787
def indexed_notice(raw_notice) -> Notice:
8888
raw_notice.set_xml_metadata(XMLMetadata(unique_xpaths=["FAKE_INDEX_XPATHS"]))
89-
raw_notice.mapping_package_identifier = "test_package_eforms_sdk1.8"
9089
return raw_notice
9190

9291

@@ -139,7 +138,6 @@ def notice_2018():
139138
notice = Notice(ted_id=ted_id)
140139
notice.set_xml_manifestation(xml_manifestation)
141140
notice.set_original_metadata(original_metadata)
142-
notice.mapping_package_identifier = "test_package_eforms_sdk1.8"
143141
return notice
144142

145143

@@ -156,7 +154,6 @@ def notice_2020():
156154
notice.set_xml_metadata(XMLMetadata(unique_xpaths=["FAKE_INDEX_XPATHS"]))
157155
notice.set_xml_manifestation(xml_manifestation)
158156
notice.set_original_metadata(original_metadata)
159-
notice.mapping_package_identifier = "test_package_eforms_sdk1.8"
160157
return notice
161158

162159

@@ -300,7 +297,6 @@ def notice_2021():
300297
notice = Notice(ted_id=ted_id)
301298
notice.set_xml_manifestation(xml_manifestation)
302299
notice.set_original_metadata(original_metadata)
303-
notice.mapping_package_identifier = "test_package_eforms_sdk1.8"
304300
return notice
305301

306302

@@ -335,7 +331,6 @@ def eform_notice_622690():
335331
notice = Notice(ted_id=ted_id)
336332
notice.set_xml_manifestation(xml_manifestation)
337333
notice.set_original_metadata(original_metadata)
338-
notice.mapping_package_identifier = "test_package_eforms_sdk1.8"
339334
return notice
340335

341336

test/e2e/data_manager/test_mongodb_client.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def test_mongo_db_query_2():
139139
])
140140

141141

142-
def test_create_matview_for_notices(fake_mongodb_client, load_mapping_suite_and_package_fake, mapping_package):
142+
def test_create_matview_for_notices(fake_mongodb_client, load_mapping_suite_and_package_fake):
143143
notice_id = "696661-2022"
144144
ted_api_query = {"query": f"ND={notice_id}"}
145145
mongodb_client = fake_mongodb_client
@@ -152,7 +152,6 @@ def test_create_matview_for_notices(fake_mongodb_client, load_mapping_suite_and_
152152
notice_event.caller_name = "execute"
153153
notice_event.start_record()
154154
notice = notice_repository.get(reference=notice_id)
155-
notice.mapping_package_identifier = mapping_package.id
156155
indexed_notice = index_notice(notice=notice)
157156
normalised_notice = normalise_notice(notice=indexed_notice, mongodb_client=mongodb_client)
158157
notice = normalised_notice

test/features/conftest.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,47 +44,48 @@ def notice_repository(mongodb_client):
4444

4545
@pytest.fixture
4646
def load_mapping_suite_and_package(mongodb_client, mapping_suite, mapping_package):
47-
"""Load mapping suite and package into MongoDB for tests that need mapping resources."""
47+
"""Load mapping suite and package into MongoDB for tests that need mapping resources.
48+
49+
Note: Only the mapping_suite is strictly required for normalisation (resources are global),
50+
but we also load mapping_package for other tests that may need it.
51+
"""
4852
mapping_suite_repository = MappingSuiteRepositoryMongoDB(mongodb_client=mongodb_client)
4953
mapping_suite_repository.add(mapping_suite=mapping_suite)
5054
mapping_package_repository = MappingPackageRepositoryMongoDB(mongodb_client=mongodb_client)
5155
mapping_package_repository.add(mapping_package=mapping_package)
5256

5357

5458
@pytest.fixture
55-
def f03_notice_2020(notice_repository, ted_api_end_point, load_mapping_suite_and_package, mapping_package):
59+
def f03_notice_2020(notice_repository, ted_api_end_point, load_mapping_suite_and_package):
5660
notice_search_query = {"query": "ND=408313-2020"}
5761
NoticeFetcher(notice_repository=notice_repository,
5862
ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(),
5963
ted_api_url=ted_api_end_point)).fetch_notices_by_query(
6064
query=notice_search_query)
6165
notice = notice_repository.get(reference="408313-2020")
6266
notice.set_xml_metadata(xml_metadata=XMLMetadata(unique_xpaths=["FAKE_INDEX_XPATHS"]))
63-
notice.mapping_package_identifier = mapping_package.id
6467
return notice
6568

6669
@pytest.fixture
67-
def eForm_notice_2023(notice_repository, ted_api_end_point, load_mapping_suite_and_package, mapping_package):
70+
def eForm_notice_2023(notice_repository, ted_api_end_point, load_mapping_suite_and_package):
6871
notice_search_query = {"query": "ND=17554-2024"}
6972
NoticeFetcher(notice_repository=notice_repository,
7073
ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(),
7174
ted_api_url=ted_api_end_point)).fetch_notices_by_query(
7275
query=notice_search_query)
7376
notice = notice_repository.get(reference="17554-2024")
7477
notice.set_xml_metadata(xml_metadata=XMLMetadata(unique_xpaths=["FAKE_INDEX_XPATHS"]))
75-
notice.mapping_package_identifier = mapping_package.id
7678
return notice
7779

7880
@pytest.fixture
79-
def f18_notice_2022(notice_repository, ted_api_end_point, load_mapping_suite_and_package, mapping_package):
81+
def f18_notice_2022(notice_repository, ted_api_end_point, load_mapping_suite_and_package):
8082
notice_search_query = {"query": "ND=67623-2022"}
8183
NoticeFetcher(notice_repository=notice_repository,
8284
ted_api_adapter=TedAPIAdapter(request_api=TedRequestAPI(),
8385
ted_api_url=ted_api_end_point)).fetch_notices_by_query(
8486
query=notice_search_query)
8587
notice = notice_repository.get(reference="67623-2022")
8688
notice.set_xml_metadata(xml_metadata=XMLMetadata(unique_xpaths=["FAKE_INDEX_XPATHS"]))
87-
notice.mapping_package_identifier = mapping_package.id
8889
return notice
8990

9091

test/features/notice_metadata_processor/conftest.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,15 @@ def notice_eligibility_repository_path():
3333

3434

3535
@pytest.fixture
36-
def normalised_notice(notice_2020, load_mapping_suite_and_package, mapping_package, mongodb_client):
36+
def normalised_notice(notice_2020, load_mapping_suite_and_package, mongodb_client):
3737
notice = notice_2020.copy()
38-
notice.mapping_package_identifier = mapping_package.id
3938
normalise_notice(notice=notice, mongodb_client=mongodb_client)
4039
return notice
4140

4241

4342
@pytest.fixture
44-
def normalised_eForm_notice(indexed_eform_notice_622690, load_mapping_suite_and_package, mapping_package, mongodb_client):
43+
def normalised_eForm_notice(indexed_eform_notice_622690, load_mapping_suite_and_package, mongodb_client):
4544
notice = indexed_eform_notice_622690.copy()
46-
notice.mapping_package_identifier = mapping_package.id
4745
normalise_notice(notice=notice, mongodb_client=mongodb_client)
4846
return notice
4947

@@ -68,12 +66,10 @@ def sample_ef_html_unsafe_notice_path() -> pathlib.Path:
6866
@pytest.fixture
6967
def sample_indexed_ef_html_unsafe_notice(
7068
sample_ef_html_unsafe_notice_path: pathlib.Path,
71-
load_mapping_suite_and_package,
72-
mapping_package) -> Notice:
69+
load_mapping_suite_and_package) -> Notice:
7370
notice: Notice = Notice(ted_id=sample_ef_html_unsafe_notice_path.name)
7471
notice.set_xml_manifestation(
7572
XMLManifestation(object_data=sample_ef_html_unsafe_notice_path.read_text()))
76-
notice.mapping_package_identifier = mapping_package.id
7773

7874
return index_notice(notice)
7975

@@ -86,12 +82,10 @@ def sample_sf_html_unsafe_notice_path() -> pathlib.Path:
8682
@pytest.fixture
8783
def sample_indexed_sf_html_unsafe_notice(
8884
sample_sf_html_unsafe_notice_path: pathlib.Path,
89-
load_mapping_suite_and_package,
90-
mapping_package) -> Notice:
85+
load_mapping_suite_and_package) -> Notice:
9186
notice: Notice = Notice(ted_id=sample_sf_html_unsafe_notice_path.name)
9287
notice.set_xml_manifestation(
9388
XMLManifestation(object_data=sample_sf_html_unsafe_notice_path.read_text()))
94-
notice.mapping_package_identifier = mapping_package.id
9589

9690
return index_notice(notice)
9791

test/unit/notice_metadata_processor/conftest.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ def sample_indexed_ef_html_unsafe_notice(
4040
notice: Notice = Notice(ted_id=sample_ef_html_unsafe_notice_path.name)
4141
notice.set_xml_manifestation(
4242
XMLManifestation(object_data=sample_ef_html_unsafe_notice_path.read_text()))
43-
notice.mapping_package_identifier = "test_package_eforms_sdk1.8"
4443
return index_notice(notice)
4544

4645

@@ -55,7 +54,6 @@ def sample_indexed_sf_html_unsafe_notice(
5554
notice: Notice = Notice(ted_id=sample_sf_html_unsafe_notice_path.name)
5655
notice.set_xml_manifestation(
5756
XMLManifestation(object_data=sample_sf_html_unsafe_notice_path.read_text()))
58-
notice.mapping_package_identifier = "test_package_eforms_sdk1.8"
5957
return index_notice(notice)
6058

6159

0 commit comments

Comments
 (0)