Skip to content

Commit f596960

Browse files
fix bugs and add improvements
1 parent 66f7845 commit f596960

8 files changed

Lines changed: 106 additions & 40 deletions

File tree

ted_sws/core/model/lazy_object.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@ def load_lazy_field(self, source_object: Any, property_field: property) -> Any:
1313
:return:
1414
"""
1515

16+
@abc.abstractmethod
17+
def remove_lazy_field(self, source_object: Any, property_field: property):
18+
"""
19+
20+
:param source_object:
21+
:param property_field:
22+
:return:
23+
"""
24+
1625

1726
class LazyObjectABC(abc.ABC):
1827

@@ -41,3 +50,13 @@ def load_lazy_field(self, property_field: property):
4150
return self.get_lazy_object_fields_loader().load_lazy_field(source_object=self,
4251
property_field=property_field)
4352
return None
53+
54+
def remove_lazy_field(self, property_field: property):
55+
"""
56+
57+
:param property_field:
58+
:return:
59+
"""
60+
if self.get_lazy_object_fields_loader():
61+
self.get_lazy_object_fields_loader().remove_lazy_field(source_object=self,
62+
property_field=property_field)

ted_sws/core/model/notice.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,8 @@ def _check_status_is_validated(self) -> bool:
338338
339339
:return:
340340
"""
341-
if self._rdf_manifestation and self._distilled_rdf_manifestation and self.xml_manifestation:
342-
if self._distilled_rdf_manifestation.is_validated() and self.xml_manifestation.is_validated():
341+
if self.rdf_manifestation and self.distilled_rdf_manifestation and self.xml_manifestation:
342+
if self.distilled_rdf_manifestation.is_validated() and self.xml_manifestation.is_validated():
343343
return True
344344
return False
345345

@@ -354,7 +354,7 @@ def set_rdf_validation(self, rdf_validation: Union[SPARQLTestSuiteValidationRepo
354354
if not self.rdf_manifestation:
355355
raise ValueError("Cannot set the RDF validation of a non-existent RDF manifestation")
356356

357-
self._rdf_manifestation.add_validation(validation=rdf_validation)
357+
self.rdf_manifestation.add_validation(validation=rdf_validation)
358358

359359
def set_distilled_rdf_validation(self, rdf_validation: Union[SPARQLTestSuiteValidationReport,
360360
SHACLTestSuiteValidationReport]):
@@ -363,10 +363,10 @@ def set_distilled_rdf_validation(self, rdf_validation: Union[SPARQLTestSuiteVali
363363
:param rdf_validation:
364364
:return:
365365
"""
366-
if not self._distilled_rdf_manifestation:
366+
if not self.distilled_rdf_manifestation:
367367
raise ValueError("Cannot set the RDF validation of a non-existent RDF manifestation")
368368

369-
self._distilled_rdf_manifestation.add_validation(validation=rdf_validation)
369+
self.distilled_rdf_manifestation.add_validation(validation=rdf_validation)
370370

371371
if self._check_status_is_validated():
372372
self.update_status_to(NoticeStatus.VALIDATED)
@@ -479,13 +479,17 @@ def update_status_to(self, new_status: NoticeStatus):
479479
raise UnsupportedStatusTransition(
480480
f"Unsupported transition from state {self._status} to state {new_status}.")
481481
elif self._status > new_status:
482-
# TODO: implement delete actions
483482
self._status = new_status
483+
if new_status < NoticeStatus.INDEXED:
484+
self.remove_lazy_field(Notice.xml_metadata)
484485
if new_status < NoticeStatus.NORMALISED_METADATA:
485-
self._normalised_metadata = None
486+
self.remove_lazy_field(Notice.normalised_metadata)
487+
#TODO: preprocessed_xml_manifestation is the same as xml_manifestation
488+
# if delete preprocessed xml manifestation will delete xml_manifestation
489+
# in future remove _preprocessed_xml_manifestation field from model
486490
self._preprocessed_xml_manifestation = None
487491
if new_status < NoticeStatus.TRANSFORMED:
488-
self._rdf_manifestation = None
489-
self._distilled_rdf_manifestation = None
492+
self.remove_lazy_field(Notice.rdf_manifestation)
493+
self.remove_lazy_field(Notice.distilled_rdf_manifestation)
490494
if new_status < NoticeStatus.PACKAGED:
491-
self._mets_manifestation = None
495+
self.remove_lazy_field(Notice.mets_manifestation)

ted_sws/data_manager/adapters/manifestation_repository.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@
1414
MANIFESTATION_ID = "manifestation_id"
1515
OBJECT_DATA_KEY = "object_data"
1616
AGGREGATE_REFERENCE_ID = "ted_id"
17+
MANIFESTATION_TYPE_ID = "manifestation_type"
1718

1819

1920
class BaseManifestationRepository(ManifestationRepositoryABC):
2021
_collection_name = "notice_manifestations"
22+
_manifestation_type = "unknown"
2123

2224
def __init__(self, mongodb_client: MongoClient, database_name: str = None):
2325
database_name = database_name if database_name else config.MONGO_DB_AGGREGATES_DATABASE_NAME
@@ -59,6 +61,7 @@ def _update_manifestation(self, reference: str, manifestation: Manifestation, up
5961
if manifestation is not None:
6062
manifestation_dict = manifestation.dict()
6163
manifestation_dict[AGGREGATE_REFERENCE_ID] = reference
64+
manifestation_dict[MANIFESTATION_TYPE_ID] = self._manifestation_type
6265
reference = self._build_reference(base_reference=reference)
6366
manifestation_dict[MONGODB_COLLECTION_ID] = reference
6467
old_linked_manifestation_file = self.file_storage.find_one({MANIFESTATION_ID: reference})
@@ -76,15 +79,16 @@ def _get_manifestation_dict(self, reference: str) -> Optional[dict]:
7679
result_dict[OBJECT_DATA_KEY] = self._get_file_content_from_grid_fs(file_id=result_dict[OBJECT_DATA_KEY])
7780
del result_dict[MONGODB_COLLECTION_ID]
7881
del result_dict[AGGREGATE_REFERENCE_ID]
82+
del result_dict[MANIFESTATION_TYPE_ID]
7983
return result_dict
8084

81-
@abc.abstractmethod
8285
def _build_reference(self, base_reference: str) -> str:
8386
"""
8487
8588
:param base_reference:
8689
:return:
8790
"""
91+
return f"{base_reference}_{self._manifestation_type}"
8892

8993
@abc.abstractmethod
9094
def _build_manifestation_from_dict(self, manifestation_dict: dict) -> Manifestation:
@@ -123,34 +127,36 @@ def get(self, reference: str) -> Optional[Manifestation]:
123127
return self._build_manifestation_from_dict(manifestation_dict=result_dict)
124128
return None
125129

130+
def remove(self, reference: str):
131+
"""
132+
This method remove a manifestation based on an identification reference.
133+
:param reference:
134+
:return:
135+
"""
136+
reference = self._build_reference(reference)
137+
self.collection.delete_one({MONGODB_COLLECTION_ID: reference})
126138

127-
class RDFManifestationRepository(BaseManifestationRepository):
128139

129-
def _build_reference(self, base_reference: str) -> str:
130-
return f"{base_reference}_rdf"
140+
class RDFManifestationRepository(BaseManifestationRepository):
141+
_manifestation_type: str = "rdf"
131142

132143
def _build_manifestation_from_dict(self, manifestation_dict: dict) -> Manifestation:
133144
return RDFManifestation(**manifestation_dict)
134145

135146

136147
class DistilledRDFManifestationRepository(RDFManifestationRepository):
137-
def _build_reference(self, base_reference: str) -> str:
138-
return f"{base_reference}_distilled_rdf"
148+
_manifestation_type: str = "distilled_rdf"
139149

140150

141151
class XMLManifestationRepository(BaseManifestationRepository):
142-
143-
def _build_reference(self, base_reference: str) -> str:
144-
return f"{base_reference}_xml"
152+
_manifestation_type: str = "xml"
145153

146154
def _build_manifestation_from_dict(self, manifestation_dict: dict) -> Manifestation:
147155
return XMLManifestation(**manifestation_dict)
148156

149157

150158
class METSManifestationRepository(BaseManifestationRepository):
151-
152-
def _build_reference(self, base_reference: str) -> str:
153-
return f"{base_reference}_mets"
159+
_manifestation_type: str = "mets"
154160

155161
def _build_manifestation_from_dict(self, manifestation_dict: dict) -> Manifestation:
156162
return METSManifestation(**manifestation_dict)

ted_sws/data_manager/adapters/metadata_repository.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,19 @@
55

66
from ted_sws import config
77
from ted_sws.core.model.metadata import Metadata, NormalisedMetadata, TEDMetadata, XMLMetadata
8-
from ted_sws.data_manager.adapters.repository_abc import RepositoryABC
8+
from ted_sws.data_manager.adapters.repository_abc import MetadataRepositoryABC
99

1010
MONGODB_COLLECTION_ID = "_id"
1111
AGGREGATE_REFERENCE_ID = "ted_id"
12+
METADATA_TYPE_ID = "metadata_type"
1213

1314

14-
class BaseMetadataRepository(RepositoryABC):
15+
class BaseMetadataRepository(MetadataRepositoryABC, abc.ABC):
1516
"""
1617
This repository is intended for storing Metadata objects.
1718
"""
18-
_collection_name = "notice_metadata"
19+
_collection_name: str = "notice_metadata"
20+
_metadata_type: str = "unknown"
1921

2022
def __init__(self, mongodb_client: MongoClient, database_name: str = None):
2123
database_name = database_name if database_name else config.MONGO_DB_AGGREGATES_DATABASE_NAME
@@ -24,6 +26,7 @@ def __init__(self, mongodb_client: MongoClient, database_name: str = None):
2426
db = mongodb_client[self._database_name]
2527
self.collection = db[self._collection_name]
2628
self.collection.create_index([(AGGREGATE_REFERENCE_ID, ASCENDING)])
29+
self.collection.create_index([(METADATA_TYPE_ID, ASCENDING)])
2730

2831
def _update_metadata(self, reference: str, metadata: Metadata, upsert: bool = False):
2932
"""
@@ -36,6 +39,7 @@ def _update_metadata(self, reference: str, metadata: Metadata, upsert: bool = Fa
3639
if metadata is not None:
3740
metadata_dict = metadata.dict()
3841
metadata_dict[AGGREGATE_REFERENCE_ID] = reference
42+
metadata_dict[METADATA_TYPE_ID] = self._metadata_type
3943
reference = self._build_reference(base_reference=reference)
4044
metadata_dict[MONGODB_COLLECTION_ID] = reference
4145
self.collection.update_one({MONGODB_COLLECTION_ID: reference}, {"$set": metadata_dict}, upsert=upsert)
@@ -51,21 +55,30 @@ def _get_metadata_dict(self, reference: str) -> Optional[dict]:
5155
if result_dict:
5256
del result_dict[MONGODB_COLLECTION_ID]
5357
del result_dict[AGGREGATE_REFERENCE_ID]
58+
del result_dict[METADATA_TYPE_ID]
5459
return result_dict
5560

56-
@abc.abstractmethod
5761
def _build_reference(self, base_reference: str) -> str:
5862
"""
5963
6064
:param base_reference:
6165
:return:
6266
"""
67+
return f"{base_reference}_{self._metadata_type}"
6368

6469

65-
class NormalisedMetadataRepository(BaseMetadataRepository):
70+
def remove(self, reference: str):
71+
"""
72+
This method remove a metadata based on an identification reference.
73+
:param reference:
74+
:return:
75+
"""
76+
reference = self._build_reference(reference)
77+
self.collection.delete_one({MONGODB_COLLECTION_ID: reference})
6678

67-
def _build_reference(self, base_reference: str) -> str:
68-
return f"{base_reference}_normalised"
79+
80+
class NormalisedMetadataRepository(BaseMetadataRepository):
81+
_metadata_type: str = "normalised"
6982

7083
def add(self, reference: str, metadata: NormalisedMetadata):
7184
"""
@@ -100,9 +113,7 @@ def get(self, reference: str) -> Optional[NormalisedMetadata]:
100113

101114

102115
class TEDMetadataRepository(BaseMetadataRepository):
103-
104-
def _build_reference(self, base_reference: str) -> str:
105-
return f"{base_reference}_ted"
116+
_metadata_type: str = "ted"
106117

107118
def add(self, reference: str, metadata: TEDMetadata):
108119
"""
@@ -136,8 +147,7 @@ def get(self, reference: str) -> Optional[TEDMetadata]:
136147

137148

138149
class XMLMetadataRepository(BaseMetadataRepository):
139-
def _build_reference(self, base_reference: str) -> str:
140-
return f"{base_reference}_xml"
150+
_metadata_type: str = "xml"
141151

142152
def add(self, reference: str, metadata: XMLMetadata):
143153
"""

ted_sws/data_manager/adapters/notice_repository.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
NOTICE_METS_MANIFESTATION_PRIVATE_KEY = "_mets_manifestation"
5454

5555

56-
5756
class NoticeRepositoryInFileSystem(NoticeRepositoryABC):
5857
"""
5958
This repository is intended for storing Notice objects as JSON files in file system.
@@ -185,7 +184,7 @@ def _mapping_lazy_fields(self):
185184
self.xml_metadata_repository),
186185
Notice.xml_manifestation: (NOTICE_XML_MANIFESTATION_PRIVATE_KEY,
187186
self.xml_manifestation_repository),
188-
#@Note: preprocessed_xml_manifestation at the moment is same as xml_manifestation
187+
# @Note: preprocessed_xml_manifestation at the moment is same as xml_manifestation
189188
# in this case is used same repository, in future need to create another repository
190189
Notice.preprocessed_xml_manifestation: (NOTICE_PREPROCESSED_XML_MANIFESTATION_KEY,
191190
self.xml_manifestation_repository),
@@ -209,6 +208,12 @@ def load_lazy_field(self, source_object: Notice, property_field: property) -> An
209208
notice_field_data = field_repository.get(source_object.ted_id)
210209
setattr(source_object, notice_field, notice_field_data)
211210

211+
def remove_lazy_field(self, source_object: Any, property_field: property):
212+
mapping_lazy_fields = self._mapping_lazy_fields()
213+
notice_field, field_repository = mapping_lazy_fields[property_field]
214+
field_repository.remove(source_object.ted_id)
215+
setattr(source_object, notice_field, None)
216+
212217
def _write_lazy_fields(self, notice: Notice):
213218
mapping_lazy_fields = self._mapping_lazy_fields()
214219
for notice_field, repository in mapping_lazy_fields.values():

ted_sws/data_manager/adapters/repository_abc.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ def get(self, reference: str) -> Optional[Metadata]:
4545
:return: Metadata
4646
"""
4747

48+
@abc.abstractmethod
49+
def remove(self, reference: str):
50+
"""
51+
This method remove a metadata based on an identification reference.
52+
:param reference:
53+
:return:
54+
"""
4855

4956
class ManifestationRepositoryABC(RepositoryABC):
5057
"""
@@ -77,6 +84,13 @@ def get(self, reference: str) -> Optional[Manifestation]:
7784
:return: Manifestation
7885
"""
7986

87+
@abc.abstractmethod
88+
def remove(self, reference: str):
89+
"""
90+
This method remove a manifestation based on an identification reference.
91+
:param reference:
92+
:return:
93+
"""
8094

8195
class NoticeRepositoryABC(RepositoryABC):
8296
"""

ted_sws/data_sampler/services/notice_selectors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def get_notice_ids_by_normalised_metadata_field_value(field_name: str, field_val
2020
:return:
2121
"""
2222
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
23-
match_filter = {f"normalised_metadata.{field_name}": field_value, "xml_metadata": {"$ne": None}}
23+
match_filter = {f"normalised_metadata.{field_name}": field_value}
2424
if notice_filter:
2525
match_filter.update(notice_filter)
2626
notice_ids = list(notice_repository.collection.aggregate([

ted_sws/data_sampler/services/notice_xml_indexer.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ def get_minimal_set_of_xpaths_for_coverage_notices(notice_ids: List[str], mongod
139139
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
140140
while len(unique_notice_ids):
141141
tmp_result = list(notice_repository.xml_metadata_repository.collection.aggregate([
142+
{"$match": {"metadata_type": {"$eq": "xml"}}},
142143
{"$unwind": "$unique_xpaths"},
143144
{"$match": {
144145
"unique_xpaths": {"$nin": minimal_set_of_xpaths},
@@ -173,7 +174,8 @@ def get_minimal_set_of_notices_for_coverage_xpaths(notice_ids: List[str], mongod
173174
while len(unique_xpaths):
174175
tmp_result = list(notice_repository.xml_metadata_repository.collection.aggregate([
175176
{"$match": {
176-
"ted_id": {"$in": search_notices}
177+
"ted_id": {"$in": search_notices},
178+
"metadata_type": {"$eq": "xml"}
177179
}
178180
},
179181
{"$unwind": "$unique_xpaths"},
@@ -205,7 +207,10 @@ def get_unique_notices_id_covered_by_xpaths(xpaths: List[str], mongodb_client: M
205207
"""
206208
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
207209
results = list(notice_repository.xml_metadata_repository.collection.aggregate([
208-
{"$match": {"unique_xpaths": {"$in": xpaths}}},
210+
{"$match": {"unique_xpaths": {"$in": xpaths},
211+
"metadata_type": {"$eq": "xml"}
212+
}
213+
},
209214
{
210215
"$group": {"_id": None,
211216
"ted_ids": {"$push": "$ted_id"}
@@ -223,7 +228,10 @@ def get_unique_xpaths_covered_by_notices(notice_ids: List[str], mongodb_client:
223228
:return:
224229
"""
225230
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
226-
results = notice_repository.xml_metadata_repository.collection.aggregate([{"$match": {"ted_id": {"$in": notice_ids}}}], allowDiskUse=True)
231+
results = notice_repository.xml_metadata_repository.collection.aggregate([{"$match": {"ted_id": {"$in": notice_ids},
232+
"metadata_type": {"$eq":"xml"}
233+
}
234+
}], allowDiskUse=True)
227235
unique_xpaths = set()
228236
for result in results:
229237
if result["unique_xpaths"] is not None:

0 commit comments

Comments
 (0)