55# Author: Eugeniu Costetchi
66# Email: costezki.eugen@gmail.com
77
8- """ """
98import abc
109from datetime import datetime
1110from enum import Enum
11+ from pathlib import Path
1212from typing import List , Optional , Union
1313
14- from pydantic import field_validator , ConfigDict , Field
14+ from pydantic import field_validator , ConfigDict , Field , model_validator
1515
1616from src .ted_sws .core .model import PropertyBaseModel
1717
1818from mapping_suite_sdk .mapping_package_v2 .models import MappingPackageV2
19- from mapping_suite_sdk .core .models .collection_asset import TestDataCollectionAsset , SPARQLTestCollectionAsset , SHACLTestCollectionAsset , \
20- TestResultCollectionAsset
19+ from mapping_suite_sdk .core .models .collection_asset import (
20+ TestDataCollectionAsset ,
21+ SPARQLTestCollectionAsset ,
22+ SHACLTestCollectionAsset ,
23+ TestResultCollectionAsset ,
24+ TechnicalMappingCollectionAsset ,
25+ VocabularyMappingCollectionAsset ,
26+ )
27+ from mapping_suite_sdk .core .models .file_asset import (
28+ RMLMappingFileAsset ,
29+ VocabularyMappingFileAsset ,
30+ )
31+ from mapping_suite_sdk .mapping_package_v2 .models .mapping_package_v2_metadata import (
32+ MappingPackageV2Metadata ,
33+ MappingPackageV2Constraints ,
34+ MappingPackageV2EligibilityConstraints ,
35+ )
2136
2237class MappingPackageComponent (PropertyBaseModel , abc .ABC ):
2338 model_config = ConfigDict (validate_assignment = True )
@@ -107,12 +122,27 @@ def __str__(self):
107122class MappingPackage (MappingPackageComponent , MappingPackageV2 ):
108123 """
109124 Extended mapping package model that inherits from an MSSDK model.
110-
125+
111126 Combines compatibility with MSSDK version 2 while adding legacy pipeline-specific fields.
112-
127+
113128 IMPORTANT: Many legacy fields are optional with defaults to avoid conflicts with MSSDK models.
114129 """
115-
130+
131+ # Override MSSDK v2 required fields to make them optional (will be auto-populated)
132+ # this is mostly a transitional solution for tests; packages are loaded and validated with pure MSSDK models first
133+ technical_mapping_suite : Optional [TechnicalMappingCollectionAsset ] = Field (
134+ default = None ,
135+ description = "RML mapping files containing the technical mapping rules/definitions"
136+ )
137+ vocabulary_mapping_suite : Optional [VocabularyMappingCollectionAsset ] = Field (
138+ default = None ,
139+ description = "Vocabulary resources used by mapping rules in XML, JSON or CSV format"
140+ )
141+ metadata : Optional [MappingPackageV2Metadata ] = Field (
142+ default = None ,
143+ description = "Package metadata containing general information"
144+ )
145+
116146 # Legacy pipeline-specific fields - MOSTLY OPTIONAL
117147 created_at : str = Field (
118148 default_factory = lambda : datetime .now ().replace (microsecond = 0 ).isoformat ()
@@ -134,7 +164,7 @@ class MappingPackage(MappingPackageComponent, MappingPackageV2):
134164 # TODO fix to be forwarded to MSSDK, remove when implemented there
135165 # Override large/optional collection assets in MSSDK model
136166 test_results : Optional [TestResultCollectionAsset ] = Field (
137- default = None ,
167+ default = None ,
138168 description = "Collections of test transformation results (optional due to large storage requirements -- will cause MongoDB BSON error for 16MB limit)"
139169 )
140170 test_data_suites : List [TestDataCollectionAsset ] = Field (
@@ -150,6 +180,196 @@ class MappingPackage(MappingPackageComponent, MappingPackageV2):
150180 description = "Collections of SHACL-based validation test suites"
151181 )
152182
183+ @model_validator (mode = 'after' )
184+ def sync_legacy_and_mssdk_fields (self ) -> 'MappingPackage' :
185+ """
186+ Automatically synchronize between legacy pipeline fields and MSSDK v2 fields.
187+
188+ Populates MSSDK v2 required fields from legacy fields when missing,
189+ or vice versa for backward compatibility.
190+
191+ This ensures the model works with both old code using legacy fields
192+ and new code using MSSDK v2 structure.
193+ """
194+ # If MSSDK v2 fields are missing but legacy fields exist, populate from legacy
195+ # FIXME: this is a transitional solution for code where the legacy file system package parsing is done
196+ if self .metadata is None :
197+ self ._populate_mssdk_from_legacy ()
198+
199+ # If legacy fields are defaults but MSSDK v2 fields exist, populate from MSSDK
200+ elif self .identifier == "no_id" and self .metadata is not None :
201+ self ._populate_legacy_from_mssdk ()
202+
203+ return self
204+
205+ def _populate_mssdk_from_legacy (self ) -> None :
206+ """Populate MSSDK v2 required fields from legacy pipeline fields."""
207+ # technical_mapping_suite from transformation_rule_set
208+ if self .technical_mapping_suite is None :
209+ if self .transformation_rule_set and self .transformation_rule_set .rml_mapping_rules :
210+ self .technical_mapping_suite = TechnicalMappingCollectionAsset (
211+ path = Path ("transformation/mappings" ),
212+ files = [
213+ RMLMappingFileAsset (
214+ path = Path (f"transformation/mappings/{ rule .file_name } " ),
215+ content = rule .file_content
216+ )
217+ for rule in self .transformation_rule_set .rml_mapping_rules
218+ ]
219+ )
220+ else :
221+ # Provide minimal dummy data to satisfy MSSDK v2 requirements
222+ self .technical_mapping_suite = TechnicalMappingCollectionAsset (
223+ path = Path ("transformation/mappings" ),
224+ files = [
225+ RMLMappingFileAsset (
226+ path = Path ("transformation/mappings/mapping.rml.ttl" ),
227+ content = "# Placeholder RML mapping"
228+ )
229+ ]
230+ )
231+
232+ # vocabulary_mapping_suite from transformation_rule_set.resources
233+ if self .vocabulary_mapping_suite is None :
234+ if self .transformation_rule_set and self .transformation_rule_set .resources :
235+ self .vocabulary_mapping_suite = VocabularyMappingCollectionAsset (
236+ path = Path ("resources" ),
237+ files = [
238+ VocabularyMappingFileAsset (
239+ path = Path (f"resources/{ res .file_name } " ),
240+ content = res .file_content
241+ )
242+ for res in self .transformation_rule_set .resources
243+ ]
244+ )
245+ else :
246+ # Provide minimal dummy data to satisfy MSSDK v2 requirements
247+ self .vocabulary_mapping_suite = VocabularyMappingCollectionAsset (
248+ path = Path ("resources" ),
249+ files = [
250+ VocabularyMappingFileAsset (
251+ path = Path ("resources/vocabulary.xml" ),
252+ content = "<dummy>vocabulary content</dummy>"
253+ )
254+ ]
255+ )
256+
257+ # metadata from legacy fields
258+ if self .metadata is None :
259+ # Extract constraints for eligibility
260+ if self .metadata_constraints :
261+ constraints_data = self .metadata_constraints .constraints
262+ if isinstance (constraints_data , MetadataConstraintsStandardForm ):
263+ eligibility_constraints = MappingPackageV2EligibilityConstraints (
264+ constraints = MappingPackageV2Constraints (
265+ eforms_subtype = constraints_data .eforms_subtype ,
266+ start_date = constraints_data .start_date ,
267+ end_date = constraints_data .end_date ,
268+ eforms_sdk_versions = constraints_data .min_xsd_version # Map min_xsd to sdk_versions
269+ )
270+ )
271+ else : # MetadataConstraintsEform
272+ eligibility_constraints = MappingPackageV2EligibilityConstraints (
273+ constraints = MappingPackageV2Constraints (
274+ eforms_subtype = constraints_data .eforms_subtype ,
275+ start_date = constraints_data .start_date ,
276+ end_date = constraints_data .end_date ,
277+ eforms_sdk_versions = constraints_data .eforms_sdk_versions
278+ )
279+ )
280+ else :
281+ # Default constraints
282+ eligibility_constraints = MappingPackageV2EligibilityConstraints (
283+ constraints = MappingPackageV2Constraints (
284+ eforms_subtype = ["0" ],
285+ start_date = None ,
286+ end_date = None ,
287+ eforms_sdk_versions = ["unknown" ]
288+ )
289+ )
290+
291+ self .metadata = MappingPackageV2Metadata (
292+ path = Path ("metadata.json" ),
293+ identifier = self .identifier if self .identifier != "no_id" else "unknown" ,
294+ title = self .title if self .title != "no_title" else "Unknown Package" ,
295+ issue_date = self .created_at ,
296+ description = f"Mapping package { self .identifier } " ,
297+ mapping_version = self .version ,
298+ ontology_version = self .ontology_version ,
299+ type = str (self .mapping_type ) if self .mapping_type else "standard_forms" ,
300+ eligibility_constraints = eligibility_constraints ,
301+ signature = self .mapping_suite_hash_digest if self .mapping_suite_hash_digest else ""
302+ )
303+
304+ def _populate_legacy_from_mssdk (self ) -> None :
305+ """Populate legacy pipeline fields from MSSDK v2 fields when needed."""
306+ if self .metadata :
307+ # Populate basic legacy fields from metadata
308+ self .identifier = self .metadata .identifier
309+ self .title = self .metadata .title
310+ self .created_at = self .metadata .issue_date
311+ self .version = self .metadata .mapping_version
312+ self .ontology_version = self .metadata .ontology_version
313+ self .mapping_suite_hash_digest = self .metadata .signature
314+ self .mapping_type = (
315+ MappingPackageType .ELECTRONIC_FORMS
316+ if self .metadata .type == "eforms"
317+ else MappingPackageType .STANDARD_FORMS
318+ )
319+
320+ # Populate metadata_constraints from eligibility_constraints
321+ constraints = self .metadata .eligibility_constraints .constraints
322+ if self .metadata .type == "eforms" :
323+ self .metadata_constraints = MetadataConstraints (
324+ constraints = MetadataConstraintsEform (
325+ eforms_subtype = constraints .eforms_subtype ,
326+ start_date = constraints .start_date ,
327+ end_date = constraints .end_date ,
328+ eforms_sdk_versions = constraints .eforms_sdk_versions
329+ )
330+ )
331+ else :
332+ self .metadata_constraints = MetadataConstraints (
333+ constraints = MetadataConstraintsStandardForm (
334+ eforms_subtype = constraints .eforms_subtype ,
335+ start_date = constraints .start_date ,
336+ end_date = constraints .end_date ,
337+ min_xsd_version = constraints .eforms_sdk_versions ,
338+ max_xsd_version = None
339+ )
340+ )
341+
342+ # Populate transformation_rule_set from MSSDK v2 suites
343+ if not self .transformation_rule_set or not self .transformation_rule_set .rml_mapping_rules :
344+ if self .technical_mapping_suite :
345+ rml_rules = [
346+ FileResource (
347+ file_name = file .path .name ,
348+ file_content = file .content ,
349+ original_name = file .path .name
350+ )
351+ for file in self .technical_mapping_suite .files
352+ ]
353+ else :
354+ rml_rules = []
355+
356+ if self .vocabulary_mapping_suite :
357+ resources = [
358+ FileResource (
359+ file_name = file .path .name ,
360+ file_content = file .content ,
361+ original_name = file .path .name
362+ )
363+ for file in self .vocabulary_mapping_suite .files
364+ ]
365+ else :
366+ resources = []
367+
368+ self .transformation_rule_set = TransformationRuleSet (
369+ resources = resources ,
370+ rml_mapping_rules = rml_rules
371+ )
372+
153373 # TODO check this out and remove if not needed (see if any production package ID does not come with version)
154374 def get_mongodb_id (self ) -> str :
155375 """Get MongoDB _id for this package."""
0 commit comments