|
2 | 2 | import os |
3 | 3 | import pathlib |
4 | 4 | import shutil |
5 | | -from datetime import datetime |
6 | 5 | from typing import Iterator, List, Optional |
7 | 6 |
|
8 | 7 | from pymongo import MongoClient |
9 | 8 |
|
| 9 | +from mapping_suite_sdk.core.adapters.repository import MongoDBRepository, ModelNotFoundError |
| 10 | + |
10 | 11 | from src.ted_sws import config |
11 | 12 | from src.ted_sws.core.model.transform import MappingPackage, FileResource, TransformationRuleSet, SHACLTestSuite, \ |
12 | 13 | SPARQLTestSuite, MetadataConstraints, TransformationTestData, MappingPackageType, \ |
13 | 14 | MetadataConstraintsStandardForm, MetadataConstraintsEform |
14 | | -from src.ted_sws.data_manager.adapters import inject_date_string_fields, remove_date_string_fields |
15 | 15 | from src.ted_sws.data_manager.adapters.repository_abc import MappingPackageRepositoryABC |
16 | 16 |
|
17 | 17 | MS_METADATA_FILE_NAME = "metadata.json" |
|
41 | 41 |
|
42 | 42 |
|
43 | 43 | class MappingPackageRepositoryMongoDB(MappingPackageRepositoryABC): |
44 | | - """ |
45 | | - This repository is intended for storing MappingPackage objects in MongoDB. |
| 44 | + """This repository is intended for storing MappingPackage objects in MongoDB with MSSDK models. |
| 45 | +
|
| 46 | + Provides unified interface for CRUD operations on mapping packages |
| 47 | + of different versions (V1, V2, V3, V3Lightweight). |
46 | 48 | """ |
47 | 49 |
|
48 | 50 | _collection_name = "mapping_package_collection" |
49 | 51 |
|
50 | 52 | def __init__(self, mongodb_client: MongoClient, database_name: str = None): |
51 | | - """ |
| 53 | + """Initialize the repository. |
52 | 54 |
|
53 | | - :param mongodb_client: |
54 | | - :param database_name: |
| 55 | + Args: |
| 56 | + mongodb_client: MongoDB client instance |
| 57 | + database_name: Database name (defaults to config value) |
55 | 58 | """ |
56 | | - mongodb_client = mongodb_client |
57 | | - self._database_name = database_name or config.MONGO_DB_AGGREGATES_DATABASE_NAME |
58 | | - notice_db = mongodb_client[self._database_name] |
59 | | - self.collection = notice_db[self._collection_name] |
| 59 | + self.database_name = database_name or config.MONGO_DB_AGGREGATES_DATABASE_NAME |
| 60 | + self.mongodb_client = mongodb_client |
60 | 61 |
|
61 | | - def _create_dict_from_mapping_package(self, mapping_package: MappingPackage) -> dict: |
62 | | - """ |
63 | | - This method create a dict from mapping package object. |
64 | | - :param mapping_package: |
65 | | - :return: |
66 | | - """ |
67 | | - mapping_package_dict = mapping_package.model_dump() |
68 | | - mapping_package_dict[MONGODB_COLLECTION_ID] = mapping_package.get_mongodb_id() |
69 | | - mapping_package_dict[MS_CREATED_AT_KEY] = datetime.fromisoformat(mapping_package_dict[MS_CREATED_AT_KEY]) |
70 | | - inject_date_string_fields(data=mapping_package_dict, date_field_name=MS_CREATED_AT_KEY) |
71 | | - return mapping_package_dict |
| 62 | + def _get_repository(self, package: MappingPackage) -> MongoDBRepository: |
| 63 | + return MongoDBRepository( |
| 64 | + model_class=type(package), |
| 65 | + mongo_client=self.mongodb_client, |
| 66 | + database_name=self.database_name, |
| 67 | + collection_name=self._collection_name |
| 68 | + ) |
72 | 69 |
|
73 | | - def _create_mapping_package_from_dict(self, mapping_package_dict: dict) -> Optional[MappingPackage]: |
74 | | - """ |
75 | | - This method create a mapping package object from a dictionary. |
76 | | - :param mapping_package_dict: |
77 | | - :return: |
78 | | - """ |
79 | | - if mapping_package_dict: |
80 | | - mapping_package_dict.pop(MONGODB_COLLECTION_ID, None) |
81 | | - mapping_package_dict[MS_CREATED_AT_KEY] = mapping_package_dict[MS_CREATED_AT_KEY].isoformat() |
82 | | - remove_date_string_fields(data=mapping_package_dict, date_field_name=MS_CREATED_AT_KEY) |
83 | | - return MappingPackage(**mapping_package_dict) |
84 | | - return None |
| 70 | + def get_repository_by_class(self, package_class): |
| 71 | + return MongoDBRepository( |
| 72 | + model_class=package_class, |
| 73 | + mongo_client=self.mongodb_client, |
| 74 | + database_name=self.database_name, |
| 75 | + collection_name=self._collection_name |
| 76 | + ) |
85 | 77 |
|
86 | | - def add(self, mapping_package: MappingPackage): |
87 | | - """ |
88 | | - This method allows you to add MappingPackage objects to the repository. |
89 | | - :param mapping_package: |
90 | | - :return: |
91 | | - """ |
92 | | - mapping_package_dict = self._create_dict_from_mapping_package(mapping_package=mapping_package) |
93 | | - mapping_package_exist = self.collection.find_one( |
94 | | - {MONGODB_COLLECTION_ID: mapping_package_dict[MONGODB_COLLECTION_ID]}) |
95 | | - if mapping_package_exist is None: |
96 | | - self.collection.insert_one(mapping_package_dict) |
| 78 | + def add(self, mapping_package: MappingPackage) -> MappingPackage: |
| 79 | + """Save a mapping package to MongoDB. |
97 | 80 |
|
98 | | - def update(self, mapping_package: MappingPackage): |
99 | | - """ |
100 | | - This method allows you to update MappingPackage objects to the repository |
101 | | - :param mapping_package: |
102 | | - :return: |
| 81 | + Args: |
| 82 | + mapping_package: The mapping package (legacy or MSSDK model) |
| 83 | +
|
| 84 | + Returns: |
| 85 | + The saved package |
103 | 86 | """ |
104 | | - mapping_package_dict = self._create_dict_from_mapping_package(mapping_package=mapping_package) |
105 | | - self.collection.update_one({MONGODB_COLLECTION_ID: mapping_package_dict[MONGODB_COLLECTION_ID]}, |
106 | | - {"$set": mapping_package_dict}) |
| 87 | + repo = self._get_repository(mapping_package) |
| 88 | + return repo.create(mapping_package) |
107 | 89 |
|
108 | | - def get(self, reference) -> MappingPackage: |
| 90 | + def get(self, reference: str, package_class: type = MappingPackage) -> MappingPackage: |
| 91 | + """Retrieve a mapping package from MongoDB. |
| 92 | +
|
| 93 | + Args: |
| 94 | + reference: The package identifier |
| 95 | + package_class: The expected package model class (defaults to MappingPackage) |
| 96 | +
|
| 97 | + Returns: |
| 98 | + The retrieved package |
| 99 | +
|
| 100 | + Raises: |
| 101 | + ModelNotFoundError: If package not found |
109 | 102 | """ |
110 | | - This method allows a MappingPackage to be obtained based on an identification reference. |
111 | | - :param reference: |
112 | | - :return: MappingPackage |
| 103 | + repo = self.get_repository_by_class(package_class) |
| 104 | + return repo.read(reference) |
| 105 | + |
| 106 | + def update(self, mapping_package: MappingPackage) -> MappingPackage: |
| 107 | + """Update a mapping package in MongoDB. |
| 108 | +
|
| 109 | + Args: |
| 110 | + mapping_package: The package to update |
| 111 | +
|
| 112 | + Returns: |
| 113 | + The updated package |
113 | 114 | """ |
114 | | - result_dict = self.collection.find_one({MONGODB_COLLECTION_ID: reference}) |
115 | | - return self._create_mapping_package_from_dict(mapping_package_dict=result_dict) |
| 115 | + repo = self._get_repository(mapping_package) |
| 116 | + return repo.update(mapping_package) |
116 | 117 |
|
117 | | - def list(self) -> Iterator[MappingPackage]: |
| 118 | + def delete(self, reference: str) -> None: |
| 119 | + """Delete a mapping package from MongoDB. |
| 120 | +
|
| 121 | + Args: |
| 122 | + reference: The package identifier |
118 | 123 | """ |
119 | | - This method allows all records to be retrieved from the repository. |
120 | | - :return: list of MappingPackages |
| 124 | + db = self.mongodb_client[self.database_name] |
| 125 | + collection = db[self._collection_name] |
| 126 | + result = collection.delete_one({'_id': reference}) |
| 127 | + if result.deleted_count < 1: |
| 128 | + raise ModelNotFoundError(f"Package with ID {reference} not found") |
| 129 | + |
| 130 | + def list(self, package_class: type = MappingPackage) -> List[MappingPackage]: |
| 131 | + """List mapping packages from MongoDB. |
| 132 | +
|
| 133 | + Args: |
| 134 | + package_class: The package model class to retrieve (defaults to V2) |
| 135 | +
|
| 136 | + Returns: |
| 137 | + List of packages |
121 | 138 | """ |
122 | | - for result_dict in self.collection.find(): |
123 | | - yield self._create_mapping_package_from_dict(mapping_package_dict=result_dict) |
| 139 | + repo = self.get_repository_by_class(package_class) |
| 140 | + return repo.read_many() |
124 | 141 |
|
125 | 142 |
|
| 143 | +# DEPRECATED - use MSSDK for reading and writing to FS, remove once all code especially tests are updated |
126 | 144 | class MappingPackageRepositoryInFileSystem(MappingPackageRepositoryABC): |
127 | 145 | """ |
128 | 146 | This repository is intended for storing MappingPackage objects in FileSystem. |
@@ -212,12 +230,42 @@ def _write_package_metadata(self, mapping_package: MappingPackage): |
212 | 230 | :param mapping_package: |
213 | 231 | :return: |
214 | 232 | """ |
| 233 | + import base64 |
| 234 | + from datetime import datetime |
| 235 | + |
| 236 | + def convert_for_json(obj): |
| 237 | + """Convert non-JSON-serializable objects (Path, bytes, datetime) to serializable form.""" |
| 238 | + if isinstance(obj, pathlib.Path): |
| 239 | + return str(obj) |
| 240 | + elif isinstance(obj, bytes): |
| 241 | + # Convert bytes to base64 string for JSON serialization |
| 242 | + return base64.b64encode(obj).decode('utf-8') |
| 243 | + elif isinstance(obj, datetime): |
| 244 | + # Convert datetime to ISO format string |
| 245 | + return obj.isoformat() |
| 246 | + elif isinstance(obj, dict): |
| 247 | + return {k: convert_for_json(v) for k, v in obj.items()} |
| 248 | + elif isinstance(obj, list): |
| 249 | + return [convert_for_json(i) for i in obj] |
| 250 | + elif isinstance(obj, tuple): |
| 251 | + return tuple(convert_for_json(i) for i in obj) |
| 252 | + else: |
| 253 | + return obj |
| 254 | + |
215 | 255 | package_path = self.repository_path / mapping_package.identifier |
216 | 256 | package_path.mkdir(parents=True, exist_ok=True) |
217 | 257 | metadata_path = package_path / MS_METADATA_FILE_NAME |
218 | 258 | package_metadata = mapping_package.model_dump() |
219 | | - [package_metadata.pop(key, None) for key in |
220 | | - ["transformation_rule_set", "shacl_test_suites", "sparql_test_suites"]] |
| 259 | + # Exclude legacy fields (written separately) and MSSDK collection asset fields (contain file content) |
| 260 | + fields_to_exclude = [ |
| 261 | + "transformation_rule_set", "shacl_test_suites", "sparql_test_suites", # Legacy fields |
| 262 | + "technical_mapping_suite", "vocabulary_mapping_suite", # MSSDK - written separately |
| 263 | + "conceptual_mapping_asset", # MSSDK - bytes content (xlsx) |
| 264 | + "test_data_suites", "test_suites_sparql", "test_suites_shacl", "test_results", # MSSDK test suites |
| 265 | + ] |
| 266 | + for key in fields_to_exclude: |
| 267 | + package_metadata.pop(key, None) |
| 268 | + package_metadata = convert_for_json(package_metadata) |
221 | 269 | with metadata_path.open("w", encoding="utf-8") as f: |
222 | 270 | f.write(json.dumps(package_metadata)) |
223 | 271 |
|
@@ -285,6 +333,8 @@ def _write_package_transform_rules(self, mapping_package: MappingPackage): |
285 | 333 | :param mapping_package: |
286 | 334 | :return: |
287 | 335 | """ |
| 336 | + if mapping_package.transformation_rule_set is None: |
| 337 | + return |
288 | 338 | package_path = self.repository_path / mapping_package.identifier |
289 | 339 | transform_path = package_path / MS_TRANSFORM_FOLDER_NAME |
290 | 340 | mappings_path = transform_path / MS_MAPPINGS_FOLDER_NAME |
@@ -332,6 +382,8 @@ def _write_test_data_package(self, mapping_package: MappingPackage): |
332 | 382 | :param mapping_package: |
333 | 383 | :return: |
334 | 384 | """ |
| 385 | + if mapping_package.transformation_test_data is None: |
| 386 | + return |
335 | 387 | package_path = self.repository_path / mapping_package.identifier |
336 | 388 | test_data_path = package_path / MS_TEST_DATA_FOLDER_NAME |
337 | 389 | test_data_path.mkdir(parents=True, exist_ok=True) |
|
0 commit comments