Skip to content

Commit 0bcd95a

Browse files
Merge pull request #404 from OP-TED/feature/TED-1040
Feature/ted 1040
2 parents 6983752 + 27a6897 commit 0bcd95a

13 files changed

Lines changed: 119 additions & 35 deletions
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@
88
from ted_sws.data_manager.services.create_notice_collection_materialised_view import \
99
create_notice_collection_materialised_view, create_notice_kpi_collection
1010

11-
DAG_NAME = "daily_materialized_view_update"
11+
DAG_NAME = "daily_materialized_views_update"
1212

1313

1414
@dag(default_args=DEFAULT_DAG_ARGUMENTS,
1515
catchup=False,
1616
schedule_interval="0 6 * * *",
1717
tags=['mongodb', 'daily-views-update'])
18-
def daily_materialized_view_update():
18+
def daily_materialized_views_update():
1919
@task
2020
def create_materialised_view():
2121
mongo_client = MongoClient(config.MONGO_DB_AUTH_URL)
@@ -34,4 +34,4 @@ def aggregate_batch_logs():
3434
create_materialised_view() >> create_kpi_collection_for_notices() >> aggregate_batch_logs()
3535

3636

37-
dag = daily_materialized_view_update()
37+
dag = daily_materialized_views_update()
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from ted_sws.event_manager.model.event_message import TechnicalEventMessage, EventMessageMetadata, \
1515
EventMessageProcessType
1616

17-
DAG_NAME = "notice_fetch_by_date_workflow"
17+
DAG_NAME = "fetch_notices_by_date"
1818
BATCH_SIZE = 2000
1919
WILD_CARD_DAG_KEY = "wild_card"
2020
TRIGGER_COMPLETE_WORKFLOW_DAG_KEY = "trigger_complete_workflow"
@@ -29,7 +29,7 @@
2929
catchup=False,
3030
timetable=CronTriggerTimetable('0 1 * * *', timezone='UTC'),
3131
tags=['selector', 'daily-fetch'])
32-
def notice_fetch_by_date_workflow():
32+
def fetch_notices_by_date():
3333
@task
3434
@event_log(TechnicalEventMessage(
3535
message="fetch_notice_from_ted",
@@ -93,4 +93,4 @@ def _branch_selector():
9393
trigger_complete_workflow] >> validate_fetched_notices_step >> finish_step
9494

9595

96-
dag = notice_fetch_by_date_workflow()
96+
dag = fetch_notices_by_date()

dags/notice_fetch_for_date_range_orchestrator.py renamed to dags/fetch_notices_by_date_range.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88

99
from dags import DEFAULT_DAG_ARGUMENTS
1010
from dags.dags_utils import get_dag_param
11-
from dags.notice_fetch_by_date_workflow import WILD_CARD_DAG_KEY, TRIGGER_COMPLETE_WORKFLOW_DAG_KEY
11+
from dags.fetch_notices_by_date import WILD_CARD_DAG_KEY, TRIGGER_COMPLETE_WORKFLOW_DAG_KEY
1212
from ted_sws.event_manager.adapters.event_log_decorator import event_log
1313
from ted_sws.event_manager.model.event_message import TechnicalEventMessage, EventMessageMetadata, \
1414
EventMessageProcessType
1515

16-
DAG_NAME = "notice_fetch_for_date_range_orchestrator"
16+
DAG_NAME = "fetch_notices_by_date_range"
1717

1818
START_DATE_KEY = "start_date"
1919
END_DATE_KEY = "end_date"
@@ -33,7 +33,7 @@ def generate_wildcards_foreach_day_in_range(start_date: str, end_date: str) -> l
3333

3434

3535
@dag(default_args=DEFAULT_DAG_ARGUMENTS, schedule_interval=None, tags=['master'])
36-
def notice_fetch_for_date_range_orchestrator():
36+
def fetch_notices_by_date_range():
3737
@task
3838
@event_log(TechnicalEventMessage(
3939
message="trigger_fetch_notices_workers_for_date_range",
@@ -59,4 +59,4 @@ def trigger_notice_by_date_for_each_date_in_range():
5959
trigger_notice_by_date_for_each_date_in_range()
6060

6161

62-
dag = notice_fetch_for_date_range_orchestrator()
62+
dag = fetch_notices_by_date_range()

dags/fetch_notices_by_query.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
from airflow.decorators import dag, task
2+
from airflow.operators.dummy import DummyOperator
3+
from airflow.operators.python import BranchPythonOperator
4+
from airflow.utils.trigger_rule import TriggerRule
5+
from dags import DEFAULT_DAG_ARGUMENTS
6+
from dags.dags_utils import get_dag_param, push_dag_downstream, pull_dag_upstream
7+
from dags.operators.DagBatchPipelineOperator import NOTICE_IDS_KEY, TriggerNoticeBatchPipelineOperator
8+
from dags.pipelines.notice_fetcher_pipelines import notice_fetcher_by_query_pipeline
9+
from ted_sws.event_manager.adapters.event_log_decorator import event_log
10+
from ted_sws.event_manager.model.event_message import TechnicalEventMessage, EventMessageMetadata, \
11+
EventMessageProcessType
12+
13+
DAG_NAME = "fetch_notices_by_query"
14+
BATCH_SIZE = 2000
15+
QUERY_DAG_KEY = "query"
16+
TRIGGER_COMPLETE_WORKFLOW_DAG_KEY = "trigger_complete_workflow"
17+
TRIGGER_PARTIAL_WORKFLOW_TASK_ID = "trigger_partial_notice_proc_workflow"
18+
TRIGGER_COMPLETE_WORKFLOW_TASK_ID = "trigger_complete_notice_proc_workflow"
19+
CHECK_IF_TRIGGER_COMPLETE_WORKFLOW_TASK_ID = "check_if_trigger_complete_workflow"
20+
FINISH_FETCH_BY_DATE_TASK_ID = "finish_fetch_by_query"
21+
22+
23+
@dag(default_args=DEFAULT_DAG_ARGUMENTS,
24+
catchup=False,
25+
tags=['fetch'])
26+
def fetch_notices_by_query():
27+
@task
28+
@event_log(TechnicalEventMessage(
29+
message="fetch_by_query_notice_from_ted",
30+
metadata=EventMessageMetadata(
31+
process_type=EventMessageProcessType.DAG, process_name=DAG_NAME
32+
))
33+
)
34+
def fetch_by_query_notice_from_ted():
35+
notice_ids = notice_fetcher_by_query_pipeline(query=get_dag_param(key=QUERY_DAG_KEY, raise_error=True))
36+
if not notice_ids:
37+
raise Exception("No notices has been fetched!")
38+
push_dag_downstream(key=NOTICE_IDS_KEY, value=notice_ids)
39+
40+
trigger_complete_workflow = TriggerNoticeBatchPipelineOperator(task_id=TRIGGER_COMPLETE_WORKFLOW_TASK_ID,
41+
execute_only_one_step=False
42+
)
43+
trigger_normalisation_workflow = TriggerNoticeBatchPipelineOperator(
44+
task_id=TRIGGER_PARTIAL_WORKFLOW_TASK_ID,
45+
batch_size=BATCH_SIZE,
46+
execute_only_one_step=True)
47+
48+
def _branch_selector():
49+
trigger_complete_workflow = get_dag_param(key=TRIGGER_COMPLETE_WORKFLOW_DAG_KEY,
50+
default_value=True)
51+
push_dag_downstream(key=NOTICE_IDS_KEY, value=pull_dag_upstream(key=NOTICE_IDS_KEY))
52+
if trigger_complete_workflow:
53+
return [TRIGGER_COMPLETE_WORKFLOW_TASK_ID]
54+
return [TRIGGER_PARTIAL_WORKFLOW_TASK_ID]
55+
56+
branch_task = BranchPythonOperator(
57+
task_id=CHECK_IF_TRIGGER_COMPLETE_WORKFLOW_TASK_ID,
58+
python_callable=_branch_selector,
59+
)
60+
61+
finish_step = DummyOperator(task_id=FINISH_FETCH_BY_DATE_TASK_ID,
62+
trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS)
63+
64+
fetch_by_query_notice_from_ted() >> branch_task >> [trigger_normalisation_workflow,
65+
trigger_complete_workflow] >> finish_step
66+
67+
68+
dag = fetch_notices_by_query()
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
@dag(default_args=DEFAULT_DAG_ARGUMENTS,
3232
schedule_interval=None,
3333
tags=['fetch', 'mapping-suite', 'github'])
34-
def load_mapping_suite_in_mongodb():
34+
def load_mapping_suite_in_database():
3535
@task
3636
@event_log(is_loggable=False)
3737
def fetch_mapping_suite_package_from_github_into_mongodb(**context_args):
@@ -86,4 +86,4 @@ def _branch_selector():
8686
branch_task >> [trigger_document_proc_pipeline, finish_step]
8787

8888

89-
dag = load_mapping_suite_in_mongodb()
89+
dag = load_mapping_suite_in_database()
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
SELECTOR_BRANCH_BEFORE_VALIDATION_TASK_ID = "switch_to_validation"
2525
SELECTOR_BRANCH_BEFORE_PACKAGE_TASK_ID = "switch_to_package"
2626
SELECTOR_BRANCH_BEFORE_PUBLISH_TASK_ID = "switch_to_publish"
27-
DAG_NAME = "notice_process_workflow"
27+
DAG_NAME = "notice_processing_pipeline"
2828

2929
BRANCH_SELECTOR_MAP = {NOTICE_NORMALISATION_PIPELINE_TASK_ID: NOTICE_NORMALISATION_PIPELINE_TASK_ID,
3030
NOTICE_TRANSFORMATION_PIPELINE_TASK_ID: SELECTOR_BRANCH_BEFORE_TRANSFORMATION_TASK_ID,
@@ -49,7 +49,7 @@ def branch_selector(result_branch: str, xcom_forward_keys: List[str] = [NOTICE_I
4949
max_active_runs=256,
5050
max_active_tasks=256,
5151
tags=['worker', 'pipeline'])
52-
def notice_process_workflow():
52+
def notice_processing_pipeline():
5353
"""
5454
5555
"""
@@ -149,4 +149,4 @@ def _stop_processing():
149149
notice_package_step >> selector_branch_before_publish >> notice_publish_step
150150

151151

152-
dag = notice_process_workflow()
152+
dag = notice_processing_pipeline()

dags/pipelines/notice_fetcher_pipelines.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from datetime import datetime, timedelta
22
from typing import List
33

4+
45
def notice_fetcher_by_date_pipeline(date_wild_card: str = None) -> List[str]:
56
from pymongo import MongoClient
67
from ted_sws import config
@@ -21,3 +22,18 @@ def notice_fetcher_by_date_pipeline(date_wild_card: str = None) -> List[str]:
2122
notice_fetched_date=notice_publication_date)
2223

2324
return notice_ids
25+
26+
27+
def notice_fetcher_by_query_pipeline(query: str = None) -> List[str]:
28+
from pymongo import MongoClient
29+
from ted_sws import config
30+
from ted_sws.data_manager.adapters.notice_repository import NoticeRepository
31+
from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI
32+
from ted_sws.notice_fetcher.services.notice_fetcher import NoticeFetcher
33+
34+
ted_api_query = {"q": query}
35+
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
36+
notice_ids = NoticeFetcher(notice_repository=NoticeRepository(mongodb_client=mongodb_client),
37+
ted_api_adapter=TedAPIAdapter(
38+
request_api=TedRequestAPI())).fetch_notices_by_query(query=ted_api_query)
39+
return notice_ids

dags/selector_raw_notices_process_orchestrator.py renamed to dags/reprocess_unnormalised_notices_from_backlog.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from ted_sws.event_manager.model.event_message import TechnicalEventMessage, EventMessageMetadata, \
99
EventMessageProcessType
1010

11-
DAG_NAME = "selector_raw_notices_process_orchestrator"
11+
DAG_NAME = "reprocess_unnormalised_notices_from_backlog"
1212

1313
TRIGGER_NOTICE_PROCESS_WORKFLOW_TASK_ID = "trigger_notice_process_workflow"
1414
FORM_NUMBER_DAG_PARAM = "form_number"
@@ -20,7 +20,7 @@
2020
@dag(default_args=DEFAULT_DAG_ARGUMENTS,
2121
schedule_interval=None,
2222
tags=['selector', 'raw-notices'])
23-
def selector_raw_notices_process_orchestrator():
23+
def reprocess_unnormalised_notices_from_backlog():
2424
@task
2525
@event_log(TechnicalEventMessage(
2626
message="select_all_raw_notices",
@@ -41,4 +41,4 @@ def select_all_raw_notices():
4141
select_all_raw_notices() >> trigger_notice_process_workflow
4242

4343

44-
dag = selector_raw_notices_process_orchestrator()
44+
dag = reprocess_unnormalised_notices_from_backlog()

dags/selector_repackage_process_orchestrator.py renamed to dags/reprocess_unpackaged_notices_from_backlog.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22

33
from dags import DEFAULT_DAG_ARGUMENTS
44
from dags.dags_utils import push_dag_downstream, get_dag_param
5-
from dags.notice_process_workflow import NOTICE_PACKAGE_PIPELINE_TASK_ID
5+
from dags.notice_processing_pipeline import NOTICE_PACKAGE_PIPELINE_TASK_ID
66
from dags.operators.DagBatchPipelineOperator import NOTICE_IDS_KEY, TriggerNoticeBatchPipelineOperator
77
from dags.pipelines.notice_selectors_pipelines import notice_ids_selector_by_status
88
from ted_sws.core.model.notice import NoticeStatus
99
from ted_sws.event_manager.adapters.event_log_decorator import event_log
1010
from ted_sws.event_manager.model.event_message import TechnicalEventMessage, EventMessageMetadata, \
1111
EventMessageProcessType
1212

13-
DAG_NAME = "selector_re_package_process_orchestrator"
13+
DAG_NAME = "reprocess_unpackaged_notices_from_backlog"
1414

1515
RE_PACKAGE_TARGET_NOTICE_STATES = [NoticeStatus.VALIDATED, NoticeStatus.INELIGIBLE_FOR_PACKAGING,
1616
NoticeStatus.ELIGIBLE_FOR_PACKAGING,
@@ -25,7 +25,7 @@
2525
@dag(default_args=DEFAULT_DAG_ARGUMENTS,
2626
schedule_interval=None,
2727
tags=['selector', 're-package'])
28-
def selector_re_package_process_orchestrator():
28+
def reprocess_unpackaged_notices_from_backlog():
2929
@task
3030
@event_log(TechnicalEventMessage(
3131
message="select_notices_for_re_package",
@@ -50,4 +50,4 @@ def select_notices_for_re_package():
5050
select_notices_for_re_package() >> trigger_notice_process_workflow
5151

5252

53-
dag = selector_re_package_process_orchestrator()
53+
dag = reprocess_unpackaged_notices_from_backlog()

dags/selector_republish_process_orchestrator.py renamed to dags/reprocess_unpublished_notices_from_backlog.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from dags import DEFAULT_DAG_ARGUMENTS
44
from dags.dags_utils import push_dag_downstream, get_dag_param
5-
from dags.notice_process_workflow import NOTICE_PUBLISH_PIPELINE_TASK_ID
5+
from dags.notice_processing_pipeline import NOTICE_PUBLISH_PIPELINE_TASK_ID
66
from dags.operators.DagBatchPipelineOperator import NOTICE_IDS_KEY, TriggerNoticeBatchPipelineOperator, \
77
EXECUTE_ONLY_ONE_STEP_KEY
88
from dags.pipelines.notice_selectors_pipelines import notice_ids_selector_by_status
@@ -11,7 +11,7 @@
1111
from ted_sws.event_manager.model.event_message import TechnicalEventMessage, EventMessageMetadata, \
1212
EventMessageProcessType
1313

14-
DAG_NAME = "selector_re_publish_process_orchestrator"
14+
DAG_NAME = "reprocess_unpublished_notices_from_backlog"
1515

1616
RE_PUBLISH_TARGET_NOTICE_STATES = [NoticeStatus.ELIGIBLE_FOR_PUBLISHING, NoticeStatus.INELIGIBLE_FOR_PUBLISHING,
1717
NoticeStatus.PACKAGED
@@ -26,7 +26,7 @@
2626
@dag(default_args=DEFAULT_DAG_ARGUMENTS,
2727
schedule_interval=None,
2828
tags=['selector', 're-publish'])
29-
def selector_re_publish_process_orchestrator():
29+
def reprocess_unpublished_notices_from_backlog():
3030
@task
3131
@event_log(TechnicalEventMessage(
3232
message="select_notices_for_re_publish",
@@ -51,4 +51,4 @@ def select_notices_for_re_publish():
5151
select_notices_for_re_publish() >> trigger_notice_process_workflow
5252

5353

54-
etl_dag = selector_re_publish_process_orchestrator()
54+
etl_dag = reprocess_unpublished_notices_from_backlog()

0 commit comments

Comments
 (0)