Skip to content

Commit 1647398

Browse files
authored
Merge pull request #536 from OP-TED/feature/TED4-162
updated mapping_suite_hash_digest generation algo on package export
2 parents 45718c7 + d818b5b commit 1647398

104 files changed

Lines changed: 306058 additions & 30 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

ted_sws/data_manager/adapters/mapping_suite_repository.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
MS_STANDARD_METADATA_VERSION_KEY = 'version'
3232
MS_EFORMS_METADATA_VERSION_KEY = 'mapping_version'
3333
MS_METADATA_CONSTRAINTS_KEY = 'metadata_constraints'
34+
MS_METADATA_CONSTRAINTS_START_DATE_KEY = 'start_date'
35+
MS_METADATA_CONSTRAINTS_END_DATE_KEY = 'end_date'
3436
MS_CONSTRAINTS_KEY = 'constraints'
3537
MS_TITLE_KEY = 'title'
3638
MS_HASH_DIGEST_KEY = 'mapping_suite_hash_digest'
@@ -134,6 +136,26 @@ def __init__(self, repository_path: pathlib.Path):
134136
self.repository_path = repository_path
135137
self.repository_path.mkdir(parents=True, exist_ok=True)
136138

139+
def _preprocess_package_metadata(self, package_metadata: dict):
140+
"""
141+
This method is adjusting the metadata structure to be fully compatible.
142+
:param package_metadata:
143+
:return:
144+
"""
145+
if MS_METADATA_CONSTRAINTS_KEY in package_metadata:
146+
metadata_constraints = package_metadata[MS_METADATA_CONSTRAINTS_KEY]
147+
if MS_CONSTRAINTS_KEY in metadata_constraints:
148+
constraints = metadata_constraints[MS_CONSTRAINTS_KEY]
149+
if MS_METADATA_CONSTRAINTS_START_DATE_KEY in constraints:
150+
start_date_value = constraints[MS_METADATA_CONSTRAINTS_START_DATE_KEY]
151+
if start_date_value and not isinstance(start_date_value, list):
152+
package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY][
153+
MS_METADATA_CONSTRAINTS_START_DATE_KEY] = [start_date_value]
154+
end_date_value = constraints[MS_METADATA_CONSTRAINTS_END_DATE_KEY]
155+
if end_date_value and not isinstance(end_date_value, list):
156+
package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY][
157+
MS_METADATA_CONSTRAINTS_END_DATE_KEY] = [end_date_value]
158+
137159
def _read_package_metadata(self, package_path: pathlib.Path) -> dict:
138160
"""
139161
This method allows reading the metadata of a packet.
@@ -143,6 +165,7 @@ def _read_package_metadata(self, package_path: pathlib.Path) -> dict:
143165
package_metadata_path = package_path / MS_METADATA_FILE_NAME
144166
package_metadata_content = package_metadata_path.read_text(encoding="utf-8")
145167
package_metadata = json.loads(package_metadata_content)
168+
self._preprocess_package_metadata(package_metadata)
146169
return package_metadata
147170

148171
def _read_transformation_rule_set(self, package_path: pathlib.Path) -> TransformationRuleSet:
@@ -346,8 +369,8 @@ def _read_mapping_suite_package(self, mapping_suite_identifier: str) -> Optional
346369
package_path = self.repository_path / mapping_suite_identifier
347370
if package_path.is_dir():
348371
package_metadata = self._read_package_metadata(package_path)
349-
if MS_MAPPING_TYPE_KEY in package_metadata and package_metadata[
350-
MS_MAPPING_TYPE_KEY] == MappingSuiteType.ELECTRONIC_FORMS:
372+
if (MS_MAPPING_TYPE_KEY in package_metadata and
373+
package_metadata[MS_MAPPING_TYPE_KEY] == MappingSuiteType.ELECTRONIC_FORMS):
351374
package_metadata[MS_METADATA_CONSTRAINTS_KEY] = MetadataConstraints(
352375
constraints=MetadataConstraintsEform(
353376
**package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY]))
@@ -363,9 +386,7 @@ def _read_mapping_suite_package(self, mapping_suite_identifier: str) -> Optional
363386
mapping_suite_hash_digest=package_metadata[MS_HASH_DIGEST_KEY],
364387
mapping_type=package_metadata[
365388
MS_MAPPING_TYPE_KEY] if MS_MAPPING_TYPE_KEY in package_metadata else MappingSuiteType.STANDARD_FORMS,
366-
version=package_metadata[
367-
MS_STANDARD_METADATA_VERSION_KEY] if MS_STANDARD_METADATA_VERSION_KEY in package_metadata else \
368-
package_metadata[MS_EFORMS_METADATA_VERSION_KEY],
389+
version=mapping_suite_read_version_from_metadata(package_metadata),
369390
identifier=package_metadata[
370391
MS_METADATA_IDENTIFIER_KEY] if MS_METADATA_IDENTIFIER_KEY in package_metadata else mapping_suite_identifier,
371392
transformation_rule_set=self._read_transformation_rule_set(package_path),
@@ -421,3 +442,9 @@ def clear_repository(self):
421442
:return:
422443
"""
423444
shutil.rmtree(self.repository_path)
445+
446+
447+
def mapping_suite_read_version_from_metadata(metadata: dict) -> str:
448+
version_key = MS_EFORMS_METADATA_VERSION_KEY if MS_MAPPING_TYPE_KEY in metadata and metadata[
449+
MS_MAPPING_TYPE_KEY] == MappingSuiteType.ELECTRONIC_FORMS else MS_STANDARD_METADATA_VERSION_KEY
450+
return metadata.get(version_key)

ted_sws/mapping_suite_processor/adapters/mapping_suite_hasher.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,35 @@
77

88
""" """
99
import hashlib
10+
import json
1011
import pathlib
1112
import re
1213
from typing import Tuple, List, Union
1314

15+
from ted_sws.core.model.transform import MappingSuiteType
1416
from ted_sws.data_manager.adapters.mapping_suite_repository import MS_TRANSFORM_FOLDER_NAME, \
15-
MS_CONCEPTUAL_MAPPING_FILE_NAME, MS_MAPPINGS_FOLDER_NAME, MS_RESOURCES_FOLDER_NAME
17+
MS_MAPPINGS_FOLDER_NAME, MS_RESOURCES_FOLDER_NAME, MS_CONCEPTUAL_MAPPING_FILE_NAME, MS_MAPPING_TYPE_KEY
18+
from ted_sws.mapping_suite_processor.model.mapping_suite_metadata import EFormsPackageMetadataBase
1619

1720

1821
class MappingSuiteHasher:
1922
"""
2023
2124
"""
2225

23-
def __init__(self, mapping_suite_path: Union[pathlib.Path, str]):
24-
self.mapping_suite_path = pathlib.Path(mapping_suite_path)
26+
def __init__(self, mapping_suite_path: pathlib.Path, mapping_suite_metadata: dict = None):
27+
self.mapping_suite_path = mapping_suite_path
28+
self.mapping_suite_metadata = mapping_suite_metadata
29+
30+
if self.is_for_eforms():
31+
self.mapping_suite_metadata = EFormsPackageMetadataBase(**mapping_suite_metadata).dict()
32+
33+
def is_for_eforms(self):
34+
return (
35+
self.mapping_suite_metadata and
36+
MS_MAPPING_TYPE_KEY in self.mapping_suite_metadata and
37+
self.mapping_suite_metadata.get(MS_MAPPING_TYPE_KEY) == MappingSuiteType.ELECTRONIC_FORMS
38+
)
2539

2640
def hash_critical_mapping_files(self) -> List[Tuple[str, str]]:
2741
"""
@@ -43,17 +57,19 @@ def _hash_a_file(file_path: pathlib.Path) -> Tuple[str, str]:
4357
relative_path = str(file_path).replace(str(self.mapping_suite_path), "")
4458
return relative_path, hashed_line
4559

46-
files_to_hash = [
60+
files_to_hash = [] if self.is_for_eforms() else [
4761
self.mapping_suite_path / MS_TRANSFORM_FOLDER_NAME / MS_CONCEPTUAL_MAPPING_FILE_NAME,
4862
]
4963

50-
mapping_files = filter(lambda item: item.is_file(),
51-
(self.mapping_suite_path / MS_TRANSFORM_FOLDER_NAME /
52-
MS_MAPPINGS_FOLDER_NAME).iterdir())
64+
mapping_files = filter(
65+
lambda item: item.is_file(),
66+
(self.mapping_suite_path / MS_TRANSFORM_FOLDER_NAME / MS_MAPPINGS_FOLDER_NAME).iterdir()
67+
)
5368

54-
mapping_resource_files = filter(lambda item: item.is_file(),
55-
(self.mapping_suite_path / MS_TRANSFORM_FOLDER_NAME /
56-
MS_RESOURCES_FOLDER_NAME).iterdir())
69+
mapping_resource_files = filter(
70+
lambda item: item.is_file(),
71+
(self.mapping_suite_path / MS_TRANSFORM_FOLDER_NAME / MS_RESOURCES_FOLDER_NAME).iterdir()
72+
)
5773

5874
files_to_hash += mapping_files
5975
files_to_hash += mapping_resource_files
@@ -62,6 +78,11 @@ def _hash_a_file(file_path: pathlib.Path) -> Tuple[str, str]:
6278
result.sort(key=lambda x: x[0])
6379
return result
6480

81+
def hash_mapping_metadata(self) -> str:
82+
return hashlib.sha256(
83+
json.dumps(self.mapping_suite_metadata).encode('utf-8')
84+
).hexdigest()
85+
6586
def hash_mapping_suite(self, with_version: str = "") -> str:
6687
"""
6788
Returns a hash of the mapping suite.
@@ -74,6 +95,11 @@ def hash_mapping_suite(self, with_version: str = "") -> str:
7495
"""
7596
list_of_hashes = self.hash_critical_mapping_files()
7697
signatures = [signature[1] for signature in list_of_hashes]
98+
99+
if self.is_for_eforms():
100+
signatures.append(self.hash_mapping_metadata())
101+
77102
if with_version:
78103
signatures += with_version
104+
79105
return hashlib.sha256(str.encode(",".join(signatures))).hexdigest()

ted_sws/mapping_suite_processor/adapters/mapping_suite_structure_checker.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
1-
import json
21
import pathlib
32
from typing import List, Union
43

5-
from ted_sws.core.model.transform import MetadataConstraints
64
from ted_sws.data_manager.adapters.mapping_suite_repository import MS_TRANSFORM_FOLDER_NAME, MS_TEST_DATA_FOLDER_NAME, \
75
MS_CONCEPTUAL_MAPPING_FILE_NAME, MS_RESOURCES_FOLDER_NAME, MS_MAPPINGS_FOLDER_NAME, MS_METADATA_FILE_NAME, \
8-
MS_VALIDATE_FOLDER_NAME, MS_SPARQL_FOLDER_NAME, MS_SHACL_FOLDER_NAME, MS_OUTPUT_FOLDER_NAME, MS_TEST_SUITE_REPORT
6+
MS_VALIDATE_FOLDER_NAME, MS_SPARQL_FOLDER_NAME, MS_SHACL_FOLDER_NAME, MS_OUTPUT_FOLDER_NAME, MS_TEST_SUITE_REPORT, \
7+
mapping_suite_read_version_from_metadata
98
from ted_sws.event_manager.model.event_message import EventMessage, EventMessageLogSettings
109
from ted_sws.event_manager.services.logger_from_context import get_console_logger
1110
from ted_sws.mapping_suite_processor.adapters.mapping_suite_hasher import MappingSuiteHasher
1211
from ted_sws.mapping_suite_processor.services.mapping_suite_reader import mapping_suite_read_metadata, \
13-
MAPPING_SUITE_HASH, VERSION_KEY
12+
MAPPING_SUITE_HASH
1413

1514
SHACL_KEYWORD = "shacl"
1615
SPARQL_KEYWORD = "sparql"
@@ -143,19 +142,20 @@ def check_for_changes_by_version(self) -> bool:
143142
settings=self.log_settings)
144143
success = True
145144

146-
metadata = mapping_suite_read_metadata(mapping_suite_path=self.mapping_suite_path)
145+
mapping_suite_metadata = mapping_suite_read_metadata(mapping_suite_path=self.mapping_suite_path)
146+
version = mapping_suite_read_version_from_metadata(mapping_suite_metadata)
147147

148-
version = metadata.get(VERSION_KEY)
148+
mapping_suite_versioned_hash = MappingSuiteHasher(
149+
mapping_suite_path=self.mapping_suite_path,
150+
mapping_suite_metadata=mapping_suite_metadata
151+
).hash_mapping_suite(with_version=version)
149152

150-
mapping_suite_versioned_hash = MappingSuiteHasher(self.mapping_suite_path).hash_mapping_suite(
151-
with_version=version)
152-
153-
if mapping_suite_versioned_hash != metadata.get(MAPPING_SUITE_HASH):
153+
if mapping_suite_versioned_hash != mapping_suite_metadata.get(MAPPING_SUITE_HASH):
154154
self.logger.error(
155155
event_message=EventMessage(
156156
message=f'The Mapping Suite hash digest ({mapping_suite_versioned_hash}) '
157157
f'does not correspond to the one in the metadata.json file '
158-
f'({metadata.get(MAPPING_SUITE_HASH)}.'
158+
f'({mapping_suite_metadata.get(MAPPING_SUITE_HASH)}.'
159159
),
160160
settings=self.log_settings
161161
)

ted_sws/mapping_suite_processor/model/__init__.py

Whitespace-only changes.
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from typing import Optional
2+
3+
from pydantic import BaseModel
4+
5+
from ted_sws.core.model.transform import MappingSuiteType, MetadataConstraints
6+
7+
8+
class EFormsPackageMetadataBase(BaseModel):
9+
identifier: str
10+
title: str
11+
created_at: str
12+
description: str
13+
mapping_version: str
14+
ontology_version: str
15+
mapping_type: Optional[MappingSuiteType] = MappingSuiteType.ELECTRONIC_FORMS
16+
metadata_constraints: MetadataConstraints
17+
18+
class Config:
19+
use_enum_values = True

ted_sws/mapping_suite_processor/services/mapping_suite_reader.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
from pathlib import Path
22
from typing import Dict
33

4+
from ted_sws.core.model.transform import MappingSuiteType
45
from ted_sws.mapping_suite_processor.adapters.mapping_suite_reader import MappingSuiteReader
56

6-
VERSION_KEY = "version"
7+
STANDARD_FORM_VERSION_KEY = "version"
8+
EFORM_VERSION_KEY = "mapping_version"
9+
MAPPING_TYPE_KEY = "mapping_type"
710
MAPPING_SUITE_HASH = "mapping_suite_hash_digest"
811

912

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
"identifier": "package_eforms_10-24_v1.9",
3+
"title": "Package EF10-EF24, SDK v1.9",
4+
"created_at": "2024-04-24 17:07:38.786702",
5+
"description": "This is the conceptual mapping for eForms subtype 10-24 SDK version 1.9",
6+
"mapping_version": "3.0.0-alpha.3",
7+
"ontology_version": "4.0.0",
8+
"mapping_type": "eforms",
9+
"metadata_constraints": {
10+
"constraints": {
11+
"eforms_subtype": [
12+
"10",
13+
"11",
14+
"12",
15+
"13",
16+
"14",
17+
"15",
18+
"16",
19+
"17",
20+
"18",
21+
"19",
22+
"20",
23+
"21",
24+
"22",
25+
"23",
26+
"24"
27+
],
28+
"start_date": null,
29+
"end_date": null,
30+
"eforms_sdk_versions": [
31+
"1.9"
32+
]
33+
}
34+
},
35+
"mapping_suite_hash_digest": "8940944c0f7e7f5761ab52d09a4e9ad51bee5cd150736f528c8d743e27ea2aaa"
36+
}

tests/test_data/mapping_suite_processor/mappings/package_eforms/output/cn_sample_2022_10/change-cn_24_cumbria_suppliers/change-cn_24_cumbria_suppliers.ttl

Whitespace-only changes.

tests/test_data/mapping_suite_processor/mappings/package_eforms/output/cn_sample_2022_10/change-cn_24_cumbria_suppliers/test_suite_report/report3

Whitespace-only changes.

tests/test_data/mapping_suite_processor/mappings/package_eforms/output/cn_sample_2022_10/change-cn_24_cumbria_suppliers/test_suite_report/shacl

Whitespace-only changes.

0 commit comments

Comments
 (0)