Skip to content

Commit d950637

Browse files
committed
feat: add post model validator to populate legacy fields and vice-versa
This is a transitional feature so as to support pre-existing code, as while we may load packages with the MSSDK model and cast them to the extended native model, certain fields in the native model will remain unpopulated. We also add support for the inverse situation, as some pre-existing code may still rely on the legacy file-system-based reading and especially writing, which has to be moved over to use MSSDK services. For both situations we rely solely on a key characteristic of the package, that is, the existence or absence metadata. Caveats: - The objects will be larger, yet to see impact on production - Objects written to file system and read back will not match
1 parent afcfa8d commit d950637

1 file changed

Lines changed: 228 additions & 8 deletions

File tree

src/ted_sws/core/model/transform.py

Lines changed: 228 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,34 @@
55
# Author: Eugeniu Costetchi
66
# Email: costezki.eugen@gmail.com
77

8-
""" """
98
import abc
109
from datetime import datetime
1110
from enum import Enum
11+
from pathlib import Path
1212
from typing import List, Optional, Union
1313

14-
from pydantic import field_validator, ConfigDict, Field
14+
from pydantic import field_validator, ConfigDict, Field, model_validator
1515

1616
from src.ted_sws.core.model import PropertyBaseModel
1717

1818
from mapping_suite_sdk.mapping_package_v2.models import MappingPackageV2
19-
from mapping_suite_sdk.core.models.collection_asset import TestDataCollectionAsset, SPARQLTestCollectionAsset, SHACLTestCollectionAsset, \
20-
TestResultCollectionAsset
19+
from mapping_suite_sdk.core.models.collection_asset import (
20+
TestDataCollectionAsset,
21+
SPARQLTestCollectionAsset,
22+
SHACLTestCollectionAsset,
23+
TestResultCollectionAsset,
24+
TechnicalMappingCollectionAsset,
25+
VocabularyMappingCollectionAsset,
26+
)
27+
from mapping_suite_sdk.core.models.file_asset import (
28+
RMLMappingFileAsset,
29+
VocabularyMappingFileAsset,
30+
)
31+
from mapping_suite_sdk.mapping_package_v2.models.mapping_package_v2_metadata import (
32+
MappingPackageV2Metadata,
33+
MappingPackageV2Constraints,
34+
MappingPackageV2EligibilityConstraints,
35+
)
2136

2237
class MappingPackageComponent(PropertyBaseModel, abc.ABC):
2338
model_config = ConfigDict(validate_assignment=True)
@@ -107,12 +122,27 @@ def __str__(self):
107122
class MappingPackage(MappingPackageComponent, MappingPackageV2):
108123
"""
109124
Extended mapping package model that inherits from an MSSDK model.
110-
125+
111126
Combines compatibility with MSSDK version 2 while adding legacy pipeline-specific fields.
112-
127+
113128
IMPORTANT: Many legacy fields are optional with defaults to avoid conflicts with MSSDK models.
114129
"""
115-
130+
131+
# Override MSSDK v2 required fields to make them optional (will be auto-populated)
132+
# this is mostly a transitional solution for tests; packages are loaded and validated with pure MSSDK models first
133+
technical_mapping_suite: Optional[TechnicalMappingCollectionAsset] = Field(
134+
default=None,
135+
description="RML mapping files containing the technical mapping rules/definitions"
136+
)
137+
vocabulary_mapping_suite: Optional[VocabularyMappingCollectionAsset] = Field(
138+
default=None,
139+
description="Vocabulary resources used by mapping rules in XML, JSON or CSV format"
140+
)
141+
metadata: Optional[MappingPackageV2Metadata] = Field(
142+
default=None,
143+
description="Package metadata containing general information"
144+
)
145+
116146
# Legacy pipeline-specific fields - MOSTLY OPTIONAL
117147
created_at: str = Field(
118148
default_factory=lambda: datetime.now().replace(microsecond=0).isoformat()
@@ -134,7 +164,7 @@ class MappingPackage(MappingPackageComponent, MappingPackageV2):
134164
# TODO fix to be forwarded to MSSDK, remove when implemented there
135165
# Override large/optional collection assets in MSSDK model
136166
test_results: Optional[TestResultCollectionAsset] = Field(
137-
default=None,
167+
default=None,
138168
description="Collections of test transformation results (optional due to large storage requirements -- will cause MongoDB BSON error for 16MB limit)"
139169
)
140170
test_data_suites: List[TestDataCollectionAsset] = Field(
@@ -150,6 +180,196 @@ class MappingPackage(MappingPackageComponent, MappingPackageV2):
150180
description="Collections of SHACL-based validation test suites"
151181
)
152182

183+
@model_validator(mode='after')
184+
def sync_legacy_and_mssdk_fields(self) -> 'MappingPackage':
185+
"""
186+
Automatically synchronize between legacy pipeline fields and MSSDK v2 fields.
187+
188+
Populates MSSDK v2 required fields from legacy fields when missing,
189+
or vice versa for backward compatibility.
190+
191+
This ensures the model works with both old code using legacy fields
192+
and new code using MSSDK v2 structure.
193+
"""
194+
# If MSSDK v2 fields are missing but legacy fields exist, populate from legacy
195+
# FIXME: this is a transitional solution for code where the legacy file system package parsing is done
196+
if self.metadata is None:
197+
self._populate_mssdk_from_legacy()
198+
199+
# If legacy fields are defaults but MSSDK v2 fields exist, populate from MSSDK
200+
elif self.identifier == "no_id" and self.metadata is not None:
201+
self._populate_legacy_from_mssdk()
202+
203+
return self
204+
205+
def _populate_mssdk_from_legacy(self) -> None:
206+
"""Populate MSSDK v2 required fields from legacy pipeline fields."""
207+
# technical_mapping_suite from transformation_rule_set
208+
if self.technical_mapping_suite is None:
209+
if self.transformation_rule_set and self.transformation_rule_set.rml_mapping_rules:
210+
self.technical_mapping_suite = TechnicalMappingCollectionAsset(
211+
path=Path("transformation/mappings"),
212+
files=[
213+
RMLMappingFileAsset(
214+
path=Path(f"transformation/mappings/{rule.file_name}"),
215+
content=rule.file_content
216+
)
217+
for rule in self.transformation_rule_set.rml_mapping_rules
218+
]
219+
)
220+
else:
221+
# Provide minimal dummy data to satisfy MSSDK v2 requirements
222+
self.technical_mapping_suite = TechnicalMappingCollectionAsset(
223+
path=Path("transformation/mappings"),
224+
files=[
225+
RMLMappingFileAsset(
226+
path=Path("transformation/mappings/mapping.rml.ttl"),
227+
content="# Placeholder RML mapping"
228+
)
229+
]
230+
)
231+
232+
# vocabulary_mapping_suite from transformation_rule_set.resources
233+
if self.vocabulary_mapping_suite is None:
234+
if self.transformation_rule_set and self.transformation_rule_set.resources:
235+
self.vocabulary_mapping_suite = VocabularyMappingCollectionAsset(
236+
path=Path("resources"),
237+
files=[
238+
VocabularyMappingFileAsset(
239+
path=Path(f"resources/{res.file_name}"),
240+
content=res.file_content
241+
)
242+
for res in self.transformation_rule_set.resources
243+
]
244+
)
245+
else:
246+
# Provide minimal dummy data to satisfy MSSDK v2 requirements
247+
self.vocabulary_mapping_suite = VocabularyMappingCollectionAsset(
248+
path=Path("resources"),
249+
files=[
250+
VocabularyMappingFileAsset(
251+
path=Path("resources/vocabulary.xml"),
252+
content="<dummy>vocabulary content</dummy>"
253+
)
254+
]
255+
)
256+
257+
# metadata from legacy fields
258+
if self.metadata is None:
259+
# Extract constraints for eligibility
260+
if self.metadata_constraints:
261+
constraints_data = self.metadata_constraints.constraints
262+
if isinstance(constraints_data, MetadataConstraintsStandardForm):
263+
eligibility_constraints = MappingPackageV2EligibilityConstraints(
264+
constraints=MappingPackageV2Constraints(
265+
eforms_subtype=constraints_data.eforms_subtype,
266+
start_date=constraints_data.start_date,
267+
end_date=constraints_data.end_date,
268+
eforms_sdk_versions=constraints_data.min_xsd_version # Map min_xsd to sdk_versions
269+
)
270+
)
271+
else: # MetadataConstraintsEform
272+
eligibility_constraints = MappingPackageV2EligibilityConstraints(
273+
constraints=MappingPackageV2Constraints(
274+
eforms_subtype=constraints_data.eforms_subtype,
275+
start_date=constraints_data.start_date,
276+
end_date=constraints_data.end_date,
277+
eforms_sdk_versions=constraints_data.eforms_sdk_versions
278+
)
279+
)
280+
else:
281+
# Default constraints
282+
eligibility_constraints = MappingPackageV2EligibilityConstraints(
283+
constraints=MappingPackageV2Constraints(
284+
eforms_subtype=["0"],
285+
start_date=None,
286+
end_date=None,
287+
eforms_sdk_versions=["unknown"]
288+
)
289+
)
290+
291+
self.metadata = MappingPackageV2Metadata(
292+
path=Path("metadata.json"),
293+
identifier=self.identifier if self.identifier != "no_id" else "unknown",
294+
title=self.title if self.title != "no_title" else "Unknown Package",
295+
issue_date=self.created_at,
296+
description=f"Mapping package {self.identifier}",
297+
mapping_version=self.version,
298+
ontology_version=self.ontology_version,
299+
type=str(self.mapping_type) if self.mapping_type else "standard_forms",
300+
eligibility_constraints=eligibility_constraints,
301+
signature=self.mapping_suite_hash_digest if self.mapping_suite_hash_digest else ""
302+
)
303+
304+
def _populate_legacy_from_mssdk(self) -> None:
305+
"""Populate legacy pipeline fields from MSSDK v2 fields when needed."""
306+
if self.metadata:
307+
# Populate basic legacy fields from metadata
308+
self.identifier = self.metadata.identifier
309+
self.title = self.metadata.title
310+
self.created_at = self.metadata.issue_date
311+
self.version = self.metadata.mapping_version
312+
self.ontology_version = self.metadata.ontology_version
313+
self.mapping_suite_hash_digest = self.metadata.signature
314+
self.mapping_type = (
315+
MappingPackageType.ELECTRONIC_FORMS
316+
if self.metadata.type == "eforms"
317+
else MappingPackageType.STANDARD_FORMS
318+
)
319+
320+
# Populate metadata_constraints from eligibility_constraints
321+
constraints = self.metadata.eligibility_constraints.constraints
322+
if self.metadata.type == "eforms":
323+
self.metadata_constraints = MetadataConstraints(
324+
constraints=MetadataConstraintsEform(
325+
eforms_subtype=constraints.eforms_subtype,
326+
start_date=constraints.start_date,
327+
end_date=constraints.end_date,
328+
eforms_sdk_versions=constraints.eforms_sdk_versions
329+
)
330+
)
331+
else:
332+
self.metadata_constraints = MetadataConstraints(
333+
constraints=MetadataConstraintsStandardForm(
334+
eforms_subtype=constraints.eforms_subtype,
335+
start_date=constraints.start_date,
336+
end_date=constraints.end_date,
337+
min_xsd_version=constraints.eforms_sdk_versions,
338+
max_xsd_version=None
339+
)
340+
)
341+
342+
# Populate transformation_rule_set from MSSDK v2 suites
343+
if not self.transformation_rule_set or not self.transformation_rule_set.rml_mapping_rules:
344+
if self.technical_mapping_suite:
345+
rml_rules = [
346+
FileResource(
347+
file_name=file.path.name,
348+
file_content=file.content,
349+
original_name=file.path.name
350+
)
351+
for file in self.technical_mapping_suite.files
352+
]
353+
else:
354+
rml_rules = []
355+
356+
if self.vocabulary_mapping_suite:
357+
resources = [
358+
FileResource(
359+
file_name=file.path.name,
360+
file_content=file.content,
361+
original_name=file.path.name
362+
)
363+
for file in self.vocabulary_mapping_suite.files
364+
]
365+
else:
366+
resources = []
367+
368+
self.transformation_rule_set = TransformationRuleSet(
369+
resources=resources,
370+
rml_mapping_rules=rml_rules
371+
)
372+
153373
# TODO check this out and remove if not needed (see if any production package ID does not come with version)
154374
def get_mongodb_id(self) -> str:
155375
"""Get MongoDB _id for this package."""

0 commit comments

Comments
 (0)