Skip to content

Commit 43990b8

Browse files
Merge pull request #421 from OP-TED/feature/TED-1149
add date as string field to overcome metabase limitations with docDB
2 parents d75719c + 56c5e1d commit 43990b8

9 files changed

Lines changed: 151 additions & 31 deletions

File tree

ted_sws/core/model/notice.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ class Config:
123123
validate_assignment = True
124124
orm_mode = True
125125

126-
created_at: str = datetime.now().isoformat()
126+
created_at: str = datetime.now().replace(microsecond=0).isoformat()
127127
version_number: int = 0
128128

129129
@property

ted_sws/core/model/transform.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ class MappingSuite(MappingSuiteComponent):
184184
"""
185185
186186
"""
187-
created_at: str = datetime.now().isoformat()
187+
created_at: str = datetime.now().replace(microsecond=0).isoformat()
188188
identifier: str = "no_id"
189189
title: str = "no_title"
190190
version: str = "0.1.1"
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
DEFAULT_DATE_STRING_FIELDS_SUFFIX_MAP = {"_str_y": "%Y",
2+
"_str_ym": "%Y-%m",
3+
"_str_ymd": "%Y-%m-%d"}
4+
5+
6+
def inject_date_string_fields(data: dict, date_field_name: str, date_string_fields_suffix_map: dict = None):
7+
"""
8+
This function adds supplementary date string fields with different formats.
9+
:param data:
10+
:param date_field_name:
11+
:param date_string_fields_suffix_map:
12+
:return:
13+
"""
14+
if date_string_fields_suffix_map is None:
15+
date_string_fields_suffix_map = DEFAULT_DATE_STRING_FIELDS_SUFFIX_MAP
16+
for date_field_suffix, date_string_format in date_string_fields_suffix_map.items():
17+
data[date_field_name + date_field_suffix] = data[date_field_name].strftime(date_string_format)
18+
19+
20+
def remove_date_string_fields(data: dict, date_field_name: str, date_string_fields_suffix_map: dict = None):
21+
"""
22+
This function remove supplementary date string fields.
23+
:param data:
24+
:param date_field_name:
25+
:param date_string_fields_suffix_map:
26+
:return:
27+
"""
28+
if date_string_fields_suffix_map is None:
29+
date_string_fields_suffix_map = DEFAULT_DATE_STRING_FIELDS_SUFFIX_MAP
30+
for date_field_suffix in date_string_fields_suffix_map.keys():
31+
data.pop(date_field_name + date_field_suffix, None)

ted_sws/data_manager/adapters/mapping_suite_repository.py

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
import os
33
import pathlib
44
import shutil
5+
from datetime import datetime
56
from typing import Iterator, List, Optional
67

78
from pymongo import MongoClient
89

910
from ted_sws import config
1011
from ted_sws.core.model.transform import MappingSuite, FileResource, TransformationRuleSet, SHACLTestSuite, \
1112
SPARQLTestSuite, MetadataConstraints, TransformationTestData, ConceptualMapping
13+
from ted_sws.data_manager.adapters import inject_date_string_fields, remove_date_string_fields
1214
from ted_sws.data_manager.adapters.repository_abc import MappingSuiteRepositoryABC
1315
from ted_sws.mapping_suite_processor.services.conceptual_mapping_reader import mapping_suite_read_conceptual_mapping
1416

@@ -23,6 +25,8 @@
2325
MS_CONCEPTUAL_MAPPING_FILE_NAME = "conceptual_mappings.xlsx"
2426
MS_OUTPUT_FOLDER_NAME = "output"
2527
MS_TEST_SUITE_REPORT = "test_suite_report"
28+
MS_CREATED_AT = "created_at"
29+
MONGODB_COLLECTION_ID = "_id"
2630

2731

2832
class MappingSuiteRepositoryMongoDB(MappingSuiteRepositoryABC):
@@ -43,15 +47,40 @@ def __init__(self, mongodb_client: MongoClient, database_name: str = None):
4347
notice_db = mongodb_client[self._database_name]
4448
self.collection = notice_db[self._collection_name]
4549

50+
def _create_dict_from_mapping_suite(self, mapping_suite: MappingSuite) -> dict:
51+
"""
52+
This method create a dict from mapping suite object.
53+
:param mapping_suite:
54+
:return:
55+
"""
56+
mapping_suite_dict = mapping_suite.dict()
57+
mapping_suite_dict[MONGODB_COLLECTION_ID] = mapping_suite.get_mongodb_id()
58+
mapping_suite_dict[MS_CREATED_AT] = datetime.fromisoformat(mapping_suite_dict[MS_CREATED_AT])
59+
inject_date_string_fields(data=mapping_suite_dict, date_field_name=MS_CREATED_AT)
60+
return mapping_suite_dict
61+
62+
def _create_mapping_suite_from_dict(self, mapping_suite_dict: dict) -> Optional[MappingSuite]:
63+
"""
64+
This method create a mapping suite object from a dictionary.
65+
:param mapping_suite_dict:
66+
:return:
67+
"""
68+
if mapping_suite_dict:
69+
mapping_suite_dict.pop(MONGODB_COLLECTION_ID, None)
70+
mapping_suite_dict[MS_CREATED_AT] = mapping_suite_dict[MS_CREATED_AT].isoformat()
71+
remove_date_string_fields(data=mapping_suite_dict, date_field_name=MS_CREATED_AT)
72+
return MappingSuite(**mapping_suite_dict)
73+
return None
74+
4675
def add(self, mapping_suite: MappingSuite):
4776
"""
4877
This method allows you to add MappingSuite objects to the repository.
4978
:param mapping_suite:
5079
:return:
5180
"""
52-
mapping_suite_dict = mapping_suite.dict()
53-
mapping_suite_dict["_id"] = mapping_suite.get_mongodb_id()
54-
mapping_suite_exist = self.collection.find_one({"_id": mapping_suite_dict["_id"]})
81+
mapping_suite_dict = self._create_dict_from_mapping_suite(mapping_suite=mapping_suite)
82+
mapping_suite_exist = self.collection.find_one(
83+
{MONGODB_COLLECTION_ID: mapping_suite_dict[MONGODB_COLLECTION_ID]})
5584
if mapping_suite_exist is None:
5685
self.collection.insert_one(mapping_suite_dict)
5786

@@ -61,26 +90,26 @@ def update(self, mapping_suite: MappingSuite):
6190
:param mapping_suite:
6291
:return:
6392
"""
64-
mapping_suite_dict = mapping_suite.dict()
65-
mapping_suite_dict["_id"] = mapping_suite.get_mongodb_id()
66-
self.collection.update_one({'_id': mapping_suite_dict["_id"]}, {"$set": mapping_suite_dict})
93+
mapping_suite_dict = self._create_dict_from_mapping_suite(mapping_suite=mapping_suite)
94+
self.collection.update_one({MONGODB_COLLECTION_ID: mapping_suite_dict[MONGODB_COLLECTION_ID]},
95+
{"$set": mapping_suite_dict})
6796

6897
def get(self, reference) -> MappingSuite:
6998
"""
7099
This method allows a MappingSuite to be obtained based on an identification reference.
71100
:param reference:
72101
:return: MappingSuite
73102
"""
74-
result_dict = self.collection.find_one({"_id": reference})
75-
return MappingSuite(**result_dict) if result_dict else None
103+
result_dict = self.collection.find_one({MONGODB_COLLECTION_ID: reference})
104+
return self._create_mapping_suite_from_dict(mapping_suite_dict=result_dict)
76105

77106
def list(self) -> Iterator[MappingSuite]:
78107
"""
79108
This method allows all records to be retrieved from the repository.
80109
:return: list of MappingSuites
81110
"""
82111
for result_dict in self.collection.find():
83-
yield MappingSuite(**result_dict)
112+
yield self._create_mapping_suite_from_dict(mapping_suite_dict=result_dict)
84113

85114

86115
class MappingSuiteRepositoryInFileSystem(MappingSuiteRepositoryABC):

ted_sws/data_manager/adapters/notice_repository.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from ted_sws.core.model.manifestation import XMLManifestation, RDFManifestation, METSManifestation, Manifestation
1414
from ted_sws.core.model.metadata import NormalisedMetadata
1515
from ted_sws.core.model.notice import Notice, NoticeStatus
16+
from ted_sws.data_manager.adapters import inject_date_string_fields, remove_date_string_fields
1617
from ted_sws.data_manager.adapters.repository_abc import NoticeRepositoryABC
1718

1819
logger = logging.getLogger(__name__)
@@ -130,12 +131,16 @@ def __init__(self, mongodb_client: MongoClient, database_name: str = None):
130131
self._database_name = database_name
131132
self.mongodb_client = mongodb_client
132133
notice_db = mongodb_client[self._database_name]
133-
self.file_storage = gridfs.GridFS(notice_db) # TODO: Investigate how it works in multiple processes in parallel.
134+
self.file_storage = gridfs.GridFS(
135+
notice_db) # TODO: Investigate how it works in multiple processes in parallel.
134136
self.collection = notice_db[self._collection_name]
135-
self.collection.create_index([(NOTICE_CREATED_AT, ASCENDING)]) # TODO: index creation may bring race condition error.
136-
self.collection.create_index([(NOTICE_STATUS, ASCENDING)]) # TODO: index creation may bring race condition error.
137+
self.collection.create_index(
138+
[(NOTICE_CREATED_AT, ASCENDING)]) # TODO: index creation may bring race condition error.
139+
self.collection.create_index(
140+
[(NOTICE_STATUS, ASCENDING)]) # TODO: index creation may bring race condition error.
137141
self.file_storage_collection = notice_db[FILE_STORAGE_COLLECTION_NAME]
138-
self.file_storage_collection.create_index([(NOTICE_ID, ASCENDING)]) # TODO: index creation may bring race condition error.
142+
self.file_storage_collection.create_index(
143+
[(NOTICE_ID, ASCENDING)]) # TODO: index creation may bring race condition error.
139144

140145
def get_file_content_from_grid_fs(self, file_id: str) -> str:
141146
"""
@@ -229,8 +234,13 @@ def date_field_to_string(date_field: datetime):
229234

230235
if notice_dict:
231236
del notice_dict[MONGODB_COLLECTION_ID]
237+
remove_date_string_fields(data=notice_dict, date_field_name=NOTICE_CREATED_AT)
232238
notice_dict[NOTICE_CREATED_AT] = notice_dict[NOTICE_CREATED_AT].isoformat()
233239
if notice_dict[NOTICE_NORMALISED_METADATA]:
240+
remove_date_string_fields(data=notice_dict[NOTICE_NORMALISED_METADATA],
241+
date_field_name=METADATA_PUBLICATION_DATE)
242+
remove_date_string_fields(data=notice_dict[NOTICE_NORMALISED_METADATA],
243+
date_field_name=METADATA_DOCUMENT_SENT_DATE)
234244
notice_dict[NOTICE_NORMALISED_METADATA][METADATA_PUBLICATION_DATE] = date_field_to_string(
235245
notice_dict[NOTICE_NORMALISED_METADATA][METADATA_PUBLICATION_DATE])
236246
notice_dict[NOTICE_NORMALISED_METADATA][METADATA_DOCUMENT_SENT_DATE] = date_field_to_string(
@@ -261,13 +271,19 @@ def _create_dict_from_notice(notice: Notice) -> dict:
261271
notice_dict[NOTICE_STATUS] = str(notice_dict[NOTICE_STATUS])
262272
notice_dict[NOTICE_CREATED_AT] = datetime.fromisoformat(notice_dict[NOTICE_CREATED_AT])
263273

274+
inject_date_string_fields(data=notice_dict, date_field_name=NOTICE_CREATED_AT)
275+
264276
if notice_dict[NOTICE_NORMALISED_METADATA]:
265277
if notice_dict[NOTICE_NORMALISED_METADATA][METADATA_PUBLICATION_DATE]:
266278
notice_dict[NOTICE_NORMALISED_METADATA][METADATA_PUBLICATION_DATE] = datetime.fromisoformat(
267279
notice_dict[NOTICE_NORMALISED_METADATA][METADATA_PUBLICATION_DATE])
280+
inject_date_string_fields(data=notice_dict[NOTICE_NORMALISED_METADATA],
281+
date_field_name=METADATA_PUBLICATION_DATE)
268282
if notice_dict[NOTICE_NORMALISED_METADATA][METADATA_DOCUMENT_SENT_DATE]:
269283
notice_dict[NOTICE_NORMALISED_METADATA][METADATA_DOCUMENT_SENT_DATE] = datetime.fromisoformat(
270284
notice_dict[NOTICE_NORMALISED_METADATA][METADATA_DOCUMENT_SENT_DATE])
285+
inject_date_string_fields(data=notice_dict[NOTICE_NORMALISED_METADATA],
286+
date_field_name=METADATA_DOCUMENT_SENT_DATE)
271287

272288
return notice_dict
273289

ted_sws/data_manager/adapters/supra_notice_repository.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55

66
from ted_sws import config
77
from ted_sws.core.model.supra_notice import DailySupraNotice
8+
from ted_sws.data_manager.adapters import inject_date_string_fields, remove_date_string_fields
89
from ted_sws.data_manager.adapters.repository_abc import DailySupraNoticeRepositoryABC
910

1011
DAILY_SUPRA_NOTICE_ID = "notice_fetched_date"
12+
DAILY_SUPRA_NOTICE_CREATED_AT = "created_at"
1113

1214

1315
class DailySupraNoticeRepository(DailySupraNoticeRepositoryABC):
@@ -22,12 +24,38 @@ def __init__(self, mongodb_client: MongoClient, database_name: str = None):
2224
self.mongodb_client = mongodb_client
2325
daily_supra_notice_db = mongodb_client[self._database_name]
2426
self.collection = daily_supra_notice_db[self._collection_name]
25-
self.collection.create_index([(DAILY_SUPRA_NOTICE_ID, ASCENDING)]) # TODO: index creation may bring race condition error.
27+
self.collection.create_index(
28+
[(DAILY_SUPRA_NOTICE_ID, ASCENDING)]) # TODO: index creation may bring race condition error.
2629

27-
def _update_daily_supra_notice(self, daily_supra_notice: DailySupraNotice, upsert: bool = False):
30+
def _create_dict_from_daily_supra_notice(self, daily_supra_notice: DailySupraNotice) -> dict:
31+
"""
32+
33+
:param daily_supra_notice:
34+
:return:
35+
"""
2836
daily_supra_notice_dict = daily_supra_notice.dict()
2937
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID] = datetime.combine(
3038
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID], time())
39+
inject_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_ID)
40+
inject_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_CREATED_AT)
41+
return daily_supra_notice_dict
42+
43+
def _create_daily_supra_notice_from_dict(self, daily_supra_notice_dict: dict) -> Optional[DailySupraNotice]:
44+
"""
45+
46+
:param daily_supra_notice_dict:
47+
:return:
48+
"""
49+
if daily_supra_notice_dict is not None:
50+
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID] = daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID].date()
51+
remove_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_ID)
52+
remove_date_string_fields(data=daily_supra_notice_dict, date_field_name=DAILY_SUPRA_NOTICE_CREATED_AT)
53+
daily_supra_notice = DailySupraNotice.parse_obj(daily_supra_notice_dict)
54+
return daily_supra_notice
55+
return None
56+
57+
def _update_daily_supra_notice(self, daily_supra_notice: DailySupraNotice, upsert: bool = False):
58+
daily_supra_notice_dict = self._create_dict_from_daily_supra_notice(daily_supra_notice=daily_supra_notice)
3159
self.collection.update_one({DAILY_SUPRA_NOTICE_ID: daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID]},
3260
{"$set": daily_supra_notice_dict}, upsert=upsert)
3361

@@ -55,16 +83,12 @@ def get(self, reference) -> Optional[DailySupraNotice]:
5583
"""
5684
reference = datetime.combine(reference, time())
5785
result_dict = self.collection.find_one({DAILY_SUPRA_NOTICE_ID: reference})
58-
if result_dict is not None:
59-
result_dict[DAILY_SUPRA_NOTICE_ID] = result_dict[DAILY_SUPRA_NOTICE_ID].date()
60-
daily_supra_notice = DailySupraNotice.parse_obj(result_dict)
61-
return daily_supra_notice
62-
return None
86+
return self._create_daily_supra_notice_from_dict(daily_supra_notice_dict=result_dict)
6387

6488
def list(self) -> Iterator[DailySupraNotice]:
6589
"""
6690
This method allows all records to be retrieved from the repository.
6791
:return: list of daily_supra_notices
6892
"""
6993
for result_dict in self.collection.find():
70-
yield DailySupraNotice(**result_dict)
94+
yield self._create_daily_supra_notice_from_dict(daily_supra_notice_dict=result_dict)

ted_sws/data_manager/services/create_notice_collection_materialised_view.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ def create_notice_collection_materialised_view(mongo_client: MongoClient):
2121
"$project": {
2222
"_id": True,
2323
"created_at": True,
24+
"created_at_str_y": True,
25+
"created_at_str_ym": True,
26+
"created_at_str_ymd": True,
2427
"status": True,
2528
"validation_summary": True,
2629
"version_number": True,
@@ -35,6 +38,9 @@ def create_notice_collection_materialised_view(mongo_client: MongoClient):
3538
"notice_type": "$normalised_metadata.notice_type",
3639
"xsd_version": "$normalised_metadata.xsd_version",
3740
"publication_date": "$normalised_metadata.publication_date",
41+
"publication_date_str_y": "$normalised_metadata.publication_date_y",
42+
"publication_date_str_ym": "$normalised_metadata.publication_date_str_ym",
43+
"publication_date_str_ymd": "$normalised_metadata.publication_date_str_ymd",
3844
}
3945
},
4046
{

ted_sws/event_manager/adapters/event_logging_repository.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,20 @@
33
from pymongo import MongoClient, ASCENDING, DESCENDING
44

55
from ted_sws import config
6+
from ted_sws.data_manager.adapters import inject_date_string_fields
67
from ted_sws.event_manager.model.event_message import EventMessage
78

89
"""
910
This module contains the event logging repository adapters.
1011
"""
1112

13+
LOGGING_DATE_FIELD_NAMES = ["created_at", "started_at", "ended_at"]
14+
LOGGING_DATE_STRING_FIELDS_SUFFIX_MAP = {"_str_y": "%Y",
15+
"_str_ym": "%Y-%m",
16+
"_str_ymd": "%Y-%m-%d",
17+
"_str_ymd_t": "%Y-%m-%d %H:%M:%S",
18+
}
19+
1220

1321
class EventLoggingRepositoryABC(abc.ABC):
1422
"""
@@ -58,11 +66,12 @@ def create_indexes(self):
5866
5967
:return: None
6068
"""
61-
try: # FIXME: This is temporary solution for exclude race condition error
62-
self.collection.create_index([("year", DESCENDING)]) # TODO: index creation may bring race condition error.
63-
self.collection.create_index([("month", ASCENDING)]) # TODO: index creation may bring race condition error.
64-
self.collection.create_index([("day", ASCENDING)]) # TODO: index creation may bring race condition error.
65-
self.collection.create_index([("caller_name", ASCENDING)]) # TODO: index creation may bring race condition error.
69+
try: # FIXME: This is temporary solution for exclude race condition error
70+
self.collection.create_index([("year", DESCENDING)]) # TODO: index creation may bring race condition error.
71+
self.collection.create_index([("month", ASCENDING)]) # TODO: index creation may bring race condition error.
72+
self.collection.create_index([("day", ASCENDING)]) # TODO: index creation may bring race condition error.
73+
self.collection.create_index(
74+
[("caller_name", ASCENDING)]) # TODO: index creation may bring race condition error.
6675
except:
6776
pass
6877

@@ -74,7 +83,13 @@ def prepare_record(cls, event_message: EventMessage) -> dict:
7483
:param event_message: The event message
7584
:return: The event message dict
7685
"""
77-
return event_message.dict()
86+
87+
event_message_dict = event_message.dict()
88+
for event_date_field_name in LOGGING_DATE_FIELD_NAMES:
89+
inject_date_string_fields(data=event_message_dict, date_field_name=event_date_field_name,
90+
date_string_fields_suffix_map=LOGGING_DATE_STRING_FIELDS_SUFFIX_MAP
91+
)
92+
return event_message_dict
7893

7994
def get_database_name(self) -> str:
8095
"""

tox.ini

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ deps =
1313
-rrequirements.dev.txt
1414

1515
commands =
16-
{envpython} -m pytest
16+
{envpython} -m pytest --cov-config=tox.ini
1717

1818
[testenv:unit]
1919
description = Running UNIT tests in the corresponding environment
@@ -57,7 +57,6 @@ addopts =
5757
--cov-report=html
5858
--cov-report=term
5959
--cov-report=xml
60-
--cov-config=tox.ini
6160
--junitxml=junit_report.xml
6261
-r a
6362
-q

0 commit comments

Comments
 (0)