Skip to content

Commit abe6e4a

Browse files
committed
feat: support for all package versions through unified v3 model
Use the one-stop MSSKD service to detect, convert and load packages of any given version, normalizing to a unified "v3". This yields support also for v3L, aka lightweight, as v3 is a superset (the lightweight variant excludes all data except bare transformation necessities). Standard Forms and eForms are henceforth "v1" and "v2", respectively. The pipeline native model is now an MSSDK v3-extended one, with the JSONLD being the canonical metadata model. Not only is there no equivalent in older models for this and the accompanying `context.jsonld`, the datetime datatype also needs special handling/conversion when used in legacy contexts. A key distinguishing feature of the new unified package is the complete refactor of the constraints model, removing one level of nesting but also adding more structure and possibilities with one model (like a range of document schema versions as seen in v1 or a list of such as seen in v2). Repurposing these constraints for legacy contexts therefore needs extra care, if not refactored completely. Recap of model differences from v1/v2 to v3: - `identifier` -> `id` - `issue_date (str)` -> `created_at (datetime)` - `ontology_version` -> `model_version` - `metadata_constraints.constraints` -> `applicability_constraints` - `eforms_subtype` -> `document_type_list` - `start_date/end_date` -> `document_time_interval.start/end` - `min/max_xsd_versions` -> `document_version_range.min/max` - `eforms_sdk_versions` -> `document_schema_version_list` Note that _applicability constraints_ is a package perspective -- the same constraints are to be interpreted by the pipeline as _eligibility constraints_ for a notice. There is an additional transitional field `project_identifier`, which stands in for the `mapping_type`, but only barely. This interpretation may be deprecated at any point, but not before support is added for alternative detection mechanisms.
1 parent 75ba112 commit abe6e4a

6 files changed

Lines changed: 154 additions & 95 deletions

File tree

src/ted_sws/core/model/transform.py

Lines changed: 129 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@
1414
from pydantic import field_validator, ConfigDict, Field, model_validator
1515

1616
from src.ted_sws.core.model import PropertyBaseModel
17+
from src.ted_sws.event_manager.services.log import log_technical_warning
1718

18-
from mapping_suite_sdk.mapping_package_v2.models import MappingPackageV2
19+
from mapping_suite_sdk.mapping_package_v3.models import MappingPackageV3
1920
from mapping_suite_sdk.core.models.collection_asset import (
2021
TestDataCollectionAsset,
2122
SPARQLTestCollectionAsset,
@@ -29,10 +30,13 @@
2930
VocabularyMappingFileAsset,
3031
TestDataFileAsset,
3132
)
32-
from mapping_suite_sdk.mapping_package_v2.models.mapping_package_v2_metadata import (
33-
MappingPackageV2Metadata,
34-
MappingPackageV2Constraints,
35-
MappingPackageV2EligibilityConstraints,
33+
from mapping_suite_sdk.mapping_package_v3.models.mapping_package_v3_metadata_jsonld import (
34+
MappingPackageV3MetadataJSONLD,
35+
)
36+
from mapping_suite_sdk.mapping_package_v3.models.mapping_package_v3_metadata import (
37+
ApplicabilityConstraints,
38+
DateTimeInterval,
39+
VersionRange,
3640
)
3741

3842
class MappingPackageComponent(PropertyBaseModel, abc.ABC):
@@ -120,11 +124,11 @@ def __str__(self):
120124

121125

122126
# this will become a union- or composition-based class when more versions are added
123-
class MappingPackage(MappingPackageComponent, MappingPackageV2):
127+
class MappingPackage(MappingPackageComponent, MappingPackageV3):
124128
"""
125129
Extended mapping package model that inherits from an MSSDK model.
126130
127-
Combines compatibility with MSSDK version 2 while adding legacy pipeline-specific fields.
131+
Combines compatibility with MSSDK version 3 (Unified) while adding legacy pipeline-specific fields.
128132
129133
IMPORTANT: Many legacy fields are optional with defaults to avoid conflicts with MSSDK models.
130134
"""
@@ -139,9 +143,9 @@ class MappingPackage(MappingPackageComponent, MappingPackageV2):
139143
default=None,
140144
description="Vocabulary resources used by mapping rules in XML, JSON or CSV format"
141145
)
142-
metadata: Optional[MappingPackageV2Metadata] = Field(
146+
metadata: Optional[MappingPackageV3MetadataJSONLD] = Field(
143147
default=None,
144-
description="Package metadata containing general information"
148+
description="Package metadata containing general information (V3 unified format)"
145149
)
146150

147151
# Legacy pipeline-specific fields - MOSTLY OPTIONAL
@@ -184,32 +188,32 @@ class MappingPackage(MappingPackageComponent, MappingPackageV2):
184188
@model_validator(mode='after')
185189
def sync_legacy_and_mssdk_fields(self) -> 'MappingPackage':
186190
"""
187-
Automatically synchronize between legacy pipeline fields and MSSDK v2 fields.
191+
Automatically synchronize between legacy pipeline fields and MSSDK v3 fields.
188192
189-
Populates MSSDK v2 required fields from legacy fields when missing,
193+
Populates MSSDK v3 required fields from legacy fields when missing,
190194
or vice versa for backward compatibility.
191195
192196
This ensures the model works with both old code using legacy fields
193-
and new code using MSSDK v2 structure.
197+
and new code using MSSDK v3 structure.
194198
Prevents infinite recursion by using a private _sync_done flag.
195199
"""
196200
if getattr(self, "_sync_done", False):
197201
return self
198202
setattr(self, "_sync_done", True)
199203

200-
# If MSSDK v2 fields are missing but legacy fields exist, populate from legacy
204+
# If MSSDK v3 fields are missing but legacy fields exist, populate from legacy
201205
# FIXME: this is a transitional solution for code where the legacy file system package parsing is done
202206
if self.metadata is None:
203207
self._populate_mssdk_from_legacy()
204208

205-
# If legacy fields are defaults but MSSDK v2 fields exist, populate from MSSDK
209+
# If legacy fields are defaults but MSSDK v3 fields exist, populate from MSSDK
206210
elif self.identifier == "no_id" and self.metadata is not None:
207211
self._populate_legacy_from_mssdk()
208212

209213
return self
210214

211215
def _populate_mssdk_from_legacy(self) -> None:
212-
"""Populate MSSDK v2 required fields from legacy pipeline fields."""
216+
"""Populate MSSDK v3 required fields from legacy pipeline fields."""
213217
# technical_mapping_suite from transformation_rule_set
214218
if self.technical_mapping_suite is None:
215219
if self.transformation_rule_set and self.transformation_rule_set.rml_mapping_rules:
@@ -224,7 +228,7 @@ def _populate_mssdk_from_legacy(self) -> None:
224228
]
225229
)
226230
else:
227-
# Provide minimal dummy data to satisfy MSSDK v2 requirements
231+
# Provide minimal dummy data to satisfy MSSDK v3 requirements
228232
self.technical_mapping_suite = TechnicalMappingCollectionAsset(
229233
path=Path("transformation/mappings"),
230234
files=[
@@ -249,7 +253,7 @@ def _populate_mssdk_from_legacy(self) -> None:
249253
]
250254
)
251255
else:
252-
# Provide minimal dummy data to satisfy MSSDK v2 requirements
256+
# Provide minimal dummy data to satisfy MSSDK v3 requirements
253257
self.vocabulary_mapping_suite = VocabularyMappingCollectionAsset(
254258
path=Path("resources"),
255259
files=[
@@ -260,97 +264,142 @@ def _populate_mssdk_from_legacy(self) -> None:
260264
]
261265
)
262266

263-
# metadata from legacy fields
267+
# metadata from legacy fields (V3 format)
264268
if self.metadata is None:
265-
# Extract constraints for eligibility
269+
# Build applicability constraints for V3 format
270+
applicability_constraints = None
266271
if self.metadata_constraints:
267272
constraints_data = self.metadata_constraints.constraints
273+
# Build document_time_interval from start_date/end_date if available
274+
document_time_interval = None
275+
if constraints_data.start_date or constraints_data.end_date:
276+
start_dt = None
277+
end_dt = None
278+
if constraints_data.start_date and constraints_data.start_date[0]:
279+
try:
280+
start_dt = datetime.fromisoformat(constraints_data.start_date[0])
281+
except (ValueError, IndexError):
282+
log_technical_warning(message=f"Ignoring invalid start_date value in metadata constraints (unable to parse as ISO format): {constraints_data.start_date[0]}")
283+
if constraints_data.end_date and constraints_data.end_date[0]:
284+
try:
285+
end_dt = datetime.fromisoformat(constraints_data.end_date[0])
286+
except (ValueError, IndexError):
287+
log_technical_warning(message=f"Ignoring invalid end_date value in metadata constraints (unable to parse as ISO format): {constraints_data.end_date[0]}")
288+
if start_dt or end_dt:
289+
document_time_interval = DateTimeInterval(start=start_dt, end=end_dt)
290+
268291
if isinstance(constraints_data, MetadataConstraintsStandardForm):
269-
eligibility_constraints = MappingPackageV2EligibilityConstraints(
270-
constraints=MappingPackageV2Constraints(
271-
eforms_subtype=constraints_data.eforms_subtype,
272-
start_date=constraints_data.start_date,
273-
end_date=constraints_data.end_date,
274-
eforms_sdk_versions=constraints_data.min_xsd_version # Map min_xsd to sdk_versions
275-
)
292+
# Standard forms: use min_xsd_version as schema version list
293+
version_range = VersionRange(
294+
min=constraints_data.min_xsd_version[0] if constraints_data.min_xsd_version else None,
295+
max=constraints_data.max_xsd_version[0] if constraints_data.max_xsd_version else None
296+
)
297+
applicability_constraints = ApplicabilityConstraints(
298+
document_type_list=constraints_data.eforms_subtype,
299+
document_time_interval=document_time_interval,
300+
document_schema_version_list=constraints_data.min_xsd_version,
301+
document_version_range=version_range
276302
)
277303
else: # MetadataConstraintsEform
278-
eligibility_constraints = MappingPackageV2EligibilityConstraints(
279-
constraints=MappingPackageV2Constraints(
280-
eforms_subtype=constraints_data.eforms_subtype,
281-
start_date=constraints_data.start_date,
282-
end_date=constraints_data.end_date,
283-
eforms_sdk_versions=constraints_data.eforms_sdk_versions
284-
)
304+
applicability_constraints = ApplicabilityConstraints(
305+
document_type_list=constraints_data.eforms_subtype,
306+
document_time_interval=document_time_interval,
307+
document_schema_version_list=constraints_data.eforms_sdk_versions,
308+
document_version_range=None
285309
)
286310
else:
287311
# Default constraints
288-
eligibility_constraints = MappingPackageV2EligibilityConstraints(
289-
constraints=MappingPackageV2Constraints(
290-
eforms_subtype=["0"],
291-
start_date=None,
292-
end_date=None,
293-
eforms_sdk_versions=["unknown"]
294-
)
312+
applicability_constraints = ApplicabilityConstraints(
313+
document_type_list=["0"],
314+
document_time_interval=None,
315+
document_schema_version_list=["unknown"],
316+
document_version_range=None
295317
)
296318

297-
self.metadata = MappingPackageV2Metadata(
298-
path=Path("metadata.json"),
299-
identifier=self.identifier if self.identifier else "unknown",
300-
title=self.title if self.title else "Unknown Package",
301-
issue_date=self.created_at,
319+
# Parse created_at string to datetime for V3 metadata
320+
try:
321+
created_at_dt = datetime.fromisoformat(self.created_at) if self.created_at else datetime.now()
322+
except ValueError:
323+
created_at_dt = datetime.now()
324+
325+
self.metadata = MappingPackageV3MetadataJSONLD(
326+
path=Path("metadata.jsonld"),
327+
context="context.jsonld",
328+
id=self.identifier if self.identifier != "no_id" else "unknown",
329+
title=self.title if self.title != "no_title" else "Unknown Package",
330+
project_identifier=str(self.mapping_type) if self.mapping_type else "standard_forms",
331+
created_at=created_at_dt,
302332
description=f"Mapping package {self.identifier}",
303333
mapping_version=self.version,
304-
ontology_version=self.ontology_version,
305-
type=str(self.mapping_type) if self.mapping_type else "standard_forms",
306-
eligibility_constraints=eligibility_constraints,
307-
signature=self.mapping_suite_hash_digest if self.mapping_suite_hash_digest else ""
334+
model_version=self.ontology_version,
335+
applicability_constraints=applicability_constraints,
336+
mapping_suite_hash_digest=self.mapping_suite_hash_digest if self.mapping_suite_hash_digest else "",
337+
input_mime_types=["application/xml"],
338+
mssdk_version="3.0.0"
308339
)
309340

310341
def _populate_legacy_from_mssdk(self) -> None:
311-
"""Populate legacy pipeline fields from MSSDK v2 fields when needed."""
342+
"""Populate legacy pipeline fields from MSSDK v3 fields when needed."""
312343
if self.metadata:
313-
# Populate basic legacy fields from metadata
344+
# Populate basic legacy fields from V3 metadata
314345
# Check against default values since they are truthy strings
315346
if self.identifier == "no_id":
316-
self.identifier = self.metadata.identifier
347+
self.identifier = self.metadata.id
317348
if self.title == "no_title":
318349
self.title = self.metadata.title
319350
if not self.created_at:
320-
self.created_at = self.metadata.issue_date
351+
# V3 created_at is datetime, convert to ISO string
352+
self.created_at = self.metadata.created_at.isoformat() if self.metadata.created_at else ""
321353
if self.version == "0.1.1":
322354
self.version = self.metadata.mapping_version
323355
if self.ontology_version == "0.0.1":
324-
self.ontology_version = self.metadata.ontology_version
356+
self.ontology_version = self.metadata.model_version
325357
if not self.mapping_suite_hash_digest:
326-
self.mapping_suite_hash_digest = self.metadata.signature
327-
self.mapping_type = (
328-
MappingPackageType.ELECTRONIC_FORMS
329-
if self.metadata.type == "eforms"
330-
else MappingPackageType.STANDARD_FORMS
331-
)
332-
333-
# Populate metadata_constraints from eligibility_constraints
334-
constraints = self.metadata.eligibility_constraints.constraints
335-
if self.metadata.type == "eforms":
336-
self.metadata_constraints = MetadataConstraints(
337-
constraints=MetadataConstraintsEform(
338-
eforms_subtype=constraints.eforms_subtype,
339-
start_date=constraints.start_date,
340-
end_date=constraints.end_date,
341-
eforms_sdk_versions=constraints.eforms_sdk_versions
342-
)
343-
)
358+
self.mapping_suite_hash_digest = self.metadata.mapping_suite_hash_digest
359+
# Map project_identifier to mapping_type
360+
if self.metadata.project_identifier == "eforms":
361+
self.mapping_type = MappingPackageType.ELECTRONIC_FORMS
344362
else:
345-
self.metadata_constraints = MetadataConstraints(
346-
constraints=MetadataConstraintsStandardForm(
347-
eforms_subtype=constraints.eforms_subtype,
348-
start_date=constraints.start_date,
349-
end_date=constraints.end_date,
350-
min_xsd_version=constraints.eforms_sdk_versions,
351-
max_xsd_version=None
363+
self.mapping_type = MappingPackageType.STANDARD_FORMS
364+
365+
# Populate metadata_constraints from V3 applicability_constraints
366+
if self.metadata.applicability_constraints:
367+
constraints = self.metadata.applicability_constraints
368+
# Extract start/end dates from document_time_interval
369+
start_date = None
370+
end_date = None
371+
if constraints.document_time_interval:
372+
if constraints.document_time_interval.start:
373+
start_date = [constraints.document_time_interval.start.isoformat()]
374+
if constraints.document_time_interval.end:
375+
end_date = [constraints.document_time_interval.end.isoformat()]
376+
377+
# Populate constraints based on mapping type (project_identifier)
378+
if self.metadata.project_identifier == "eforms":
379+
self.metadata_constraints = MetadataConstraints(
380+
constraints=MetadataConstraintsEform(
381+
eforms_subtype=constraints.document_type_list,
382+
start_date=start_date,
383+
end_date=end_date,
384+
eforms_sdk_versions=constraints.document_schema_version_list or ["0.1"]
385+
)
386+
)
387+
else:
388+
# Standard forms style
389+
min_xsd = constraints.document_schema_version_list or ["0.1"]
390+
max_xsd = None
391+
if constraints.document_version_range:
392+
if constraints.document_version_range.max:
393+
max_xsd = [constraints.document_version_range.max]
394+
self.metadata_constraints = MetadataConstraints(
395+
constraints=MetadataConstraintsStandardForm(
396+
eforms_subtype=constraints.document_type_list,
397+
start_date=start_date,
398+
end_date=end_date,
399+
min_xsd_version=min_xsd,
400+
max_xsd_version=max_xsd
401+
)
352402
)
353-
)
354403

355404
# Populate transformation_rule_set from MSSDK v2 suites
356405
if not self.transformation_rule_set or not self.transformation_rule_set.rml_mapping_rules:

src/ted_sws/data_manager/adapters/mapping_package_repository.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,14 +280,18 @@ def _write_package_metadata(self, mapping_package: MappingPackage):
280280
:return:
281281
"""
282282
import base64
283+
from datetime import datetime
283284

284285
def convert_for_json(obj):
285-
"""Convert non-JSON-serializable objects (Path, bytes) to serializable form."""
286+
"""Convert non-JSON-serializable objects (Path, bytes, datetime) to serializable form."""
286287
if isinstance(obj, pathlib.Path):
287288
return str(obj)
288289
elif isinstance(obj, bytes):
289290
# Convert bytes to base64 string for JSON serialization
290291
return base64.b64encode(obj).decode('utf-8')
292+
elif isinstance(obj, datetime):
293+
# Convert datetime to ISO format string
294+
return obj.isoformat()
291295
elif isinstance(obj, dict):
292296
return {k: convert_for_json(v) for k, v in obj.items()}
293297
elif isinstance(obj, list):

0 commit comments

Comments
 (0)