Skip to content

Commit 1095938

Browse files
author
Kolea Plesco
committed
Merge remote-tracking branch 'origin/main' into feature/TED-585
2 parents 3978658 + 3f070d5 commit 1095938

5 files changed

Lines changed: 26 additions & 36 deletions

File tree

dags/selector_retransform_process_orchestrator.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,18 @@
88
from ted_sws.core.model.notice import NoticeStatus
99
from ted_sws.data_manager.adapters.notice_repository import NoticeRepository
1010
from ted_sws.event_manager.adapters.event_log_decorator import event_log
11+
from ted_sws.event_manager.adapters.event_logger import EventLogger
1112
from ted_sws.event_manager.model.event_message import TechnicalEventMessage, EventMessageMetadata, \
12-
EventMessageProcessType
13+
EventMessageProcessType, EventMessage
14+
from ted_sws.event_manager.services.logger_from_context import get_logger_from_dag_context
1315

1416
DAG_NAME = "selector_re_transform_process_orchestrator"
1517

16-
RE_TRANSFORM_TARGET_NOTICE_STATES = [NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION, NoticeStatus.NORMALISED_METADATA,
17-
NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION,
18+
RE_TRANSFORM_TARGET_NOTICE_STATES = [NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION,
1819
NoticeStatus.PREPROCESSED_FOR_TRANSFORMATION,
1920
NoticeStatus.INELIGIBLE_FOR_TRANSFORMATION, NoticeStatus.TRANSFORMED,
20-
NoticeStatus.DISTILLED,
21-
NoticeStatus.VALIDATED, NoticeStatus.INELIGIBLE_FOR_PACKAGING
21+
NoticeStatus.DISTILLED, NoticeStatus.VALIDATED,
22+
NoticeStatus.INELIGIBLE_FOR_PACKAGING
2223
]
2324

2425

@@ -33,13 +34,15 @@ def selector_re_transform_process_orchestrator():
3334
process_type=EventMessageProcessType.DAG, process_name=DAG_NAME
3435
))
3536
)
36-
def select_notices_for_re_transform_and_reset_status():
37+
def select_notices_for_re_transform_and_reset_status(**context_args):
38+
event_logger: EventLogger = get_logger_from_dag_context(context_args)
3739
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
3840
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
3941
for target_notice_state in RE_TRANSFORM_TARGET_NOTICE_STATES:
42+
event_logger.info(event_message=EventMessage(message=f"select notices with status : {target_notice_state}"))
4043
notices = notice_repository.get_notice_by_status(notice_status=target_notice_state)
4144
for notice in notices:
42-
notice.update_status_to(new_status=NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION)
45+
notice.update_status_to(new_status=NoticeStatus.NORMALISED_METADATA)
4346
notice_repository.update(notice=notice)
4447

4548
@task
@@ -53,7 +56,7 @@ def trigger_worker_for_transform_branch():
5356
context = get_current_context()
5457
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
5558
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
56-
notices = notice_repository.get_notice_by_status(notice_status=NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION)
59+
notices = notice_repository.get_notice_by_status(notice_status=NoticeStatus.NORMALISED_METADATA)
5760
for notice in notices:
5861
TriggerDagRunOperator(
5962
task_id=f'trigger_worker_dag_{notice.ted_id}',

dags/worker_single_notice_process_orchestrator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -453,9 +453,9 @@ def _check_notice_state_before_notice_successfully_processed():
453453
state_skip_table = {
454454
NoticeStatus.RAW: "index_notice_xml_content",
455455
NoticeStatus.INDEXED: "index_notice_xml_content",
456-
NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION: "check_eligibility_for_transformation",
457-
NoticeStatus.ELIGIBLE_FOR_PACKAGING: "generate_mets_package",
458-
NoticeStatus.ELIGIBLE_FOR_PUBLISHING: "publish_notice_in_cellar",
456+
NoticeStatus.NORMALISED_METADATA: "check_eligibility_for_transformation",
457+
NoticeStatus.ELIGIBLE_FOR_PACKAGING: "check_notice_state_before_generate_mets_package",
458+
NoticeStatus.ELIGIBLE_FOR_PUBLISHING: "check_notice_state_before_publish_notice_in_cellar",
459459
}
460460

461461
def _get_task_run():

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,5 @@ rdflib~=6.1.1
1919
pyshacl~=0.19.0
2020
agraph-python==101.0.10
2121
decorator~=5.1.1
22-
urllib3[secure]
22+
urllib3[secure]
23+
semantic-version==2.10.0

ted_sws/notice_metadata_processor/services/notice_eligibility.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import datetime
22
from typing import Tuple
33

4+
import semantic_version
5+
46
from ted_sws.core.model.metadata import NormalisedMetadata
57
from ted_sws.core.model.notice import Notice
68
from ted_sws.core.model.transform import MappingSuite
@@ -35,17 +37,6 @@ def check_package(mapping_suite: MappingSuite, notice_metadata: NormalisedMetada
3537
return True if in_date_range and in_version_range and covered_eform_type else False
3638

3739

38-
def transform_version_string_into_int(version_string: str) -> int:
39-
"""
40-
Transforming a version string into a number. (example_version = "1.2.3")
41-
:param version_string:
42-
:return:
43-
"""
44-
version_numbers = [int(x) for x in version_string.split(".")]
45-
assert len(version_numbers) == 3
46-
return ((version_numbers[0] * 100) + version_numbers[1]) * 100 + version_numbers[2]
47-
48-
4940
def notice_eligibility_checker(notice: Notice, mapping_suite_repository: MappingSuiteRepositoryABC) -> Tuple:
5041
"""
5142
Check if notice in eligible for transformation
@@ -60,11 +51,13 @@ def notice_eligibility_checker(notice: Notice, mapping_suite_repository: Mapping
6051
possible_mapping_suites.append(mapping_suite)
6152

6253
if possible_mapping_suites:
63-
best_version = max([transform_version_string_into_int(version_string=mapping_suite.version) for mapping_suite in
64-
possible_mapping_suites])
65-
mapping_suite_identifier = next((mapping_suite.identifier for mapping_suite in possible_mapping_suites if
66-
transform_version_string_into_int(
67-
version_string=mapping_suite.version) == best_version), None)
54+
best_version = possible_mapping_suites[0].version
55+
mapping_suite_identifier = possible_mapping_suites[0].identifier
56+
for mapping_suite in possible_mapping_suites[1:]:
57+
if semantic_version.Version(mapping_suite.version) > semantic_version.Version(best_version):
58+
best_version = mapping_suite.version
59+
mapping_suite_identifier = mapping_suite.identifier
60+
6861
notice.set_is_eligible_for_transformation(eligibility=True)
6962
return notice.ted_id, mapping_suite_identifier
7063
else:

tests/unit/notice_metadata_processor/test_eligibility.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryInFileSystem
33
from ted_sws.notice_metadata_processor.services.metadata_normalizer import MetadataNormaliser
44
from ted_sws.notice_metadata_processor.services.notice_eligibility import check_package, \
5-
notice_eligibility_checker, notice_eligibility_checker_by_id, transform_version_string_into_int
5+
notice_eligibility_checker, notice_eligibility_checker_by_id
66

77

88
def test_non_eligibility_by_notice(notice_eligibility_repository_path, indexed_notice):
@@ -47,10 +47,3 @@ def test_check_mapping_suite(notice_eligibility_repository_path, normalised_meta
4747
is_valid = check_package(mapping_suite=mapping_suite_repository.get("test_package"),
4848
notice_metadata=normalised_metadata_object)
4949
assert not is_valid
50-
51-
52-
def test_transform_version_string_into_int():
53-
funky_version_string = "5.7.6"
54-
funky_version_int = transform_version_string_into_int(version_string=funky_version_string)
55-
assert isinstance(funky_version_int, int)
56-
assert funky_version_int == 50706

0 commit comments

Comments
 (0)