Skip to content

Commit 70dcca7

Browse files
authored
Merge pull request #23 from meaningfy-ws/feature/TED9-166_extend-github-download-packages
[TEDSWS-232] Breaking: Transition to MSSDK for package loading/saving
2 parents 1a3e707 + 76dc968 commit 70dcca7

31 files changed

Lines changed: 736 additions & 517 deletions

File tree

src/ted_sws/core/model/transform.py

Lines changed: 445 additions & 49 deletions
Large diffs are not rendered by default.

src/ted_sws/data_manager/adapters/mapping_package_repository.py

Lines changed: 118 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,16 @@
22
import os
33
import pathlib
44
import shutil
5-
from datetime import datetime
65
from typing import Iterator, List, Optional
76

87
from pymongo import MongoClient
98

9+
from mapping_suite_sdk.core.adapters.repository import MongoDBRepository, ModelNotFoundError
10+
1011
from src.ted_sws import config
1112
from src.ted_sws.core.model.transform import MappingPackage, FileResource, TransformationRuleSet, SHACLTestSuite, \
1213
SPARQLTestSuite, MetadataConstraints, TransformationTestData, MappingPackageType, \
1314
MetadataConstraintsStandardForm, MetadataConstraintsEform
14-
from src.ted_sws.data_manager.adapters import inject_date_string_fields, remove_date_string_fields
1515
from src.ted_sws.data_manager.adapters.repository_abc import MappingPackageRepositoryABC
1616

1717
MS_METADATA_FILE_NAME = "metadata.json"
@@ -41,88 +41,106 @@
4141

4242

4343
class MappingPackageRepositoryMongoDB(MappingPackageRepositoryABC):
44-
"""
45-
This repository is intended for storing MappingPackage objects in MongoDB.
44+
"""This repository is intended for storing MappingPackage objects in MongoDB with MSSDK models.
45+
46+
Provides unified interface for CRUD operations on mapping packages
47+
of different versions (V1, V2, V3, V3Lightweight).
4648
"""
4749

4850
_collection_name = "mapping_package_collection"
4951

5052
def __init__(self, mongodb_client: MongoClient, database_name: str = None):
51-
"""
53+
"""Initialize the repository.
5254
53-
:param mongodb_client:
54-
:param database_name:
55+
Args:
56+
mongodb_client: MongoDB client instance
57+
database_name: Database name (defaults to config value)
5558
"""
56-
mongodb_client = mongodb_client
57-
self._database_name = database_name or config.MONGO_DB_AGGREGATES_DATABASE_NAME
58-
notice_db = mongodb_client[self._database_name]
59-
self.collection = notice_db[self._collection_name]
59+
self.database_name = database_name or config.MONGO_DB_AGGREGATES_DATABASE_NAME
60+
self.mongodb_client = mongodb_client
6061

61-
def _create_dict_from_mapping_package(self, mapping_package: MappingPackage) -> dict:
62-
"""
63-
This method create a dict from mapping package object.
64-
:param mapping_package:
65-
:return:
66-
"""
67-
mapping_package_dict = mapping_package.model_dump()
68-
mapping_package_dict[MONGODB_COLLECTION_ID] = mapping_package.get_mongodb_id()
69-
mapping_package_dict[MS_CREATED_AT_KEY] = datetime.fromisoformat(mapping_package_dict[MS_CREATED_AT_KEY])
70-
inject_date_string_fields(data=mapping_package_dict, date_field_name=MS_CREATED_AT_KEY)
71-
return mapping_package_dict
62+
def _get_repository(self, package: MappingPackage) -> MongoDBRepository:
63+
return MongoDBRepository(
64+
model_class=type(package),
65+
mongo_client=self.mongodb_client,
66+
database_name=self.database_name,
67+
collection_name=self._collection_name
68+
)
7269

73-
def _create_mapping_package_from_dict(self, mapping_package_dict: dict) -> Optional[MappingPackage]:
74-
"""
75-
This method create a mapping package object from a dictionary.
76-
:param mapping_package_dict:
77-
:return:
78-
"""
79-
if mapping_package_dict:
80-
mapping_package_dict.pop(MONGODB_COLLECTION_ID, None)
81-
mapping_package_dict[MS_CREATED_AT_KEY] = mapping_package_dict[MS_CREATED_AT_KEY].isoformat()
82-
remove_date_string_fields(data=mapping_package_dict, date_field_name=MS_CREATED_AT_KEY)
83-
return MappingPackage(**mapping_package_dict)
84-
return None
70+
def get_repository_by_class(self, package_class):
71+
return MongoDBRepository(
72+
model_class=package_class,
73+
mongo_client=self.mongodb_client,
74+
database_name=self.database_name,
75+
collection_name=self._collection_name
76+
)
8577

86-
def add(self, mapping_package: MappingPackage):
87-
"""
88-
This method allows you to add MappingPackage objects to the repository.
89-
:param mapping_package:
90-
:return:
91-
"""
92-
mapping_package_dict = self._create_dict_from_mapping_package(mapping_package=mapping_package)
93-
mapping_package_exist = self.collection.find_one(
94-
{MONGODB_COLLECTION_ID: mapping_package_dict[MONGODB_COLLECTION_ID]})
95-
if mapping_package_exist is None:
96-
self.collection.insert_one(mapping_package_dict)
78+
def add(self, mapping_package: MappingPackage) -> MappingPackage:
79+
"""Save a mapping package to MongoDB.
9780
98-
def update(self, mapping_package: MappingPackage):
99-
"""
100-
This method allows you to update MappingPackage objects to the repository
101-
:param mapping_package:
102-
:return:
81+
Args:
82+
mapping_package: The mapping package (legacy or MSSDK model)
83+
84+
Returns:
85+
The saved package
10386
"""
104-
mapping_package_dict = self._create_dict_from_mapping_package(mapping_package=mapping_package)
105-
self.collection.update_one({MONGODB_COLLECTION_ID: mapping_package_dict[MONGODB_COLLECTION_ID]},
106-
{"$set": mapping_package_dict})
87+
repo = self._get_repository(mapping_package)
88+
return repo.create(mapping_package)
10789

108-
def get(self, reference) -> MappingPackage:
90+
def get(self, reference: str, package_class: type = MappingPackage) -> MappingPackage:
91+
"""Retrieve a mapping package from MongoDB.
92+
93+
Args:
94+
reference: The package identifier
95+
package_class: The expected package model class (defaults to MappingPackage)
96+
97+
Returns:
98+
The retrieved package
99+
100+
Raises:
101+
ModelNotFoundError: If package not found
109102
"""
110-
This method allows a MappingPackage to be obtained based on an identification reference.
111-
:param reference:
112-
:return: MappingPackage
103+
repo = self.get_repository_by_class(package_class)
104+
return repo.read(reference)
105+
106+
def update(self, mapping_package: MappingPackage) -> MappingPackage:
107+
"""Update a mapping package in MongoDB.
108+
109+
Args:
110+
mapping_package: The package to update
111+
112+
Returns:
113+
The updated package
113114
"""
114-
result_dict = self.collection.find_one({MONGODB_COLLECTION_ID: reference})
115-
return self._create_mapping_package_from_dict(mapping_package_dict=result_dict)
115+
repo = self._get_repository(mapping_package)
116+
return repo.update(mapping_package)
116117

117-
def list(self) -> Iterator[MappingPackage]:
118+
def delete(self, reference: str) -> None:
119+
"""Delete a mapping package from MongoDB.
120+
121+
Args:
122+
reference: The package identifier
118123
"""
119-
This method allows all records to be retrieved from the repository.
120-
:return: list of MappingPackages
124+
db = self.mongodb_client[self.database_name]
125+
collection = db[self._collection_name]
126+
result = collection.delete_one({'_id': reference})
127+
if result.deleted_count < 1:
128+
raise ModelNotFoundError(f"Package with ID {reference} not found")
129+
130+
def list(self, package_class: type = MappingPackage) -> List[MappingPackage]:
131+
"""List mapping packages from MongoDB.
132+
133+
Args:
134+
package_class: The package model class to retrieve (defaults to V2)
135+
136+
Returns:
137+
List of packages
121138
"""
122-
for result_dict in self.collection.find():
123-
yield self._create_mapping_package_from_dict(mapping_package_dict=result_dict)
139+
repo = self.get_repository_by_class(package_class)
140+
return repo.read_many()
124141

125142

143+
# DEPRECATED - use MSSDK for reading and writing to FS, remove once all code especially tests are updated
126144
class MappingPackageRepositoryInFileSystem(MappingPackageRepositoryABC):
127145
"""
128146
This repository is intended for storing MappingPackage objects in FileSystem.
@@ -212,12 +230,42 @@ def _write_package_metadata(self, mapping_package: MappingPackage):
212230
:param mapping_package:
213231
:return:
214232
"""
233+
import base64
234+
from datetime import datetime
235+
236+
def convert_for_json(obj):
237+
"""Convert non-JSON-serializable objects (Path, bytes, datetime) to serializable form."""
238+
if isinstance(obj, pathlib.Path):
239+
return str(obj)
240+
elif isinstance(obj, bytes):
241+
# Convert bytes to base64 string for JSON serialization
242+
return base64.b64encode(obj).decode('utf-8')
243+
elif isinstance(obj, datetime):
244+
# Convert datetime to ISO format string
245+
return obj.isoformat()
246+
elif isinstance(obj, dict):
247+
return {k: convert_for_json(v) for k, v in obj.items()}
248+
elif isinstance(obj, list):
249+
return [convert_for_json(i) for i in obj]
250+
elif isinstance(obj, tuple):
251+
return tuple(convert_for_json(i) for i in obj)
252+
else:
253+
return obj
254+
215255
package_path = self.repository_path / mapping_package.identifier
216256
package_path.mkdir(parents=True, exist_ok=True)
217257
metadata_path = package_path / MS_METADATA_FILE_NAME
218258
package_metadata = mapping_package.model_dump()
219-
[package_metadata.pop(key, None) for key in
220-
["transformation_rule_set", "shacl_test_suites", "sparql_test_suites"]]
259+
# Exclude legacy fields (written separately) and MSSDK collection asset fields (contain file content)
260+
fields_to_exclude = [
261+
"transformation_rule_set", "shacl_test_suites", "sparql_test_suites", # Legacy fields
262+
"technical_mapping_suite", "vocabulary_mapping_suite", # MSSDK - written separately
263+
"conceptual_mapping_asset", # MSSDK - bytes content (xlsx)
264+
"test_data_suites", "test_suites_sparql", "test_suites_shacl", "test_results", # MSSDK test suites
265+
]
266+
for key in fields_to_exclude:
267+
package_metadata.pop(key, None)
268+
package_metadata = convert_for_json(package_metadata)
221269
with metadata_path.open("w", encoding="utf-8") as f:
222270
f.write(json.dumps(package_metadata))
223271

@@ -285,6 +333,8 @@ def _write_package_transform_rules(self, mapping_package: MappingPackage):
285333
:param mapping_package:
286334
:return:
287335
"""
336+
if mapping_package.transformation_rule_set is None:
337+
return
288338
package_path = self.repository_path / mapping_package.identifier
289339
transform_path = package_path / MS_TRANSFORM_FOLDER_NAME
290340
mappings_path = transform_path / MS_MAPPINGS_FOLDER_NAME
@@ -332,6 +382,8 @@ def _write_test_data_package(self, mapping_package: MappingPackage):
332382
:param mapping_package:
333383
:return:
334384
"""
385+
if mapping_package.transformation_test_data is None:
386+
return
335387
package_path = self.repository_path / mapping_package.identifier
336388
test_data_path = package_path / MS_TEST_DATA_FOLDER_NAME
337389
test_data_path.mkdir(parents=True, exist_ok=True)

src/ted_sws/mapping_suite_processor/adapters/github_ms_project_downloader.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import tempfile
66
from typing import ClassVar
77
from src.ted_sws import config
8-
8+
from src.ted_sws.event_manager.services.log import log_technical_info
99
# TODO: get from env or config
1010
MAPPINGS_DIR_NAME = "mappings"
1111
MS_CONFIG_DIR_NAME = "config"
@@ -76,12 +76,16 @@ def get_git_head_hash(git_repository_path: pathlib.Path) -> str:
7676

7777
with tempfile.TemporaryDirectory() as tmp_dir:
7878
temp_dir_path = pathlib.Path(tmp_dir)
79-
bash_script = f"cd {temp_dir_path} && git clone --branch {self.branch_or_tag_name} {self.github_repository_url}"
80-
subprocess.run(bash_script, shell=True,
81-
stdout=subprocess.DEVNULL,
82-
stderr=subprocess.STDOUT)
79+
bash_script = f"cd {temp_dir_path} && git clone --depth 1 --branch {self.branch_or_tag_name} {self.github_repository_url}"
80+
result = subprocess.run(bash_script, shell=True,
81+
capture_output=True, text=True)
82+
log_technical_info(
83+
message=f"Downloaded stdout '{result.stdout}'")
84+
log_technical_info(
85+
message=f"Downloaded stderr '{result.stderr}'")
8386
git_last_commit_hash = get_git_head_hash(
8487
git_repository_path=temp_dir_path / self.repository_name)
8588
downloaded_tmp_project_path = temp_dir_path / self.repository_name
8689
shutil.copytree(downloaded_tmp_project_path, output_project_path, dirs_exist_ok=True)
90+
8791
return git_last_commit_hash

0 commit comments

Comments
 (0)