Skip to content

Commit b954983

Browse files
committed
add tests for daily materialised view dag scheduling
1 parent 76a1c2b commit b954983

4 files changed

Lines changed: 95 additions & 1 deletion

File tree

dags/daily_materialized_views_update.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from ted_sws.data_manager.services.create_notice_collection_materialised_view import \
99
create_notice_collection_materialised_view, create_notice_kpi_collection
1010

11-
DAG_NAME = "daily_materialized_views_update"
11+
DAILY_MATERIALISED_VIEWS_DAG_NAME = "daily_materialized_views_update"
1212

1313

1414
@dag(default_args=DEFAULT_DAG_ARGUMENTS,

ted_sws/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
SPARQL_PREFIXES_PATH = PROJECT_PATH / "resources" / "prefixes" / "prefixes.json"
3737

3838
DAG_FETCH_DEFAULT_TIMETABLE = "0 1 * * *"
39+
DAG_MATERIALIZED_VIEW_UPDATE_DEFAULT_TIMETABLE = "0 6 * * *"
3940

4041
class MongoDBConfig:
4142

@@ -270,6 +271,9 @@ class DagSchedulingConfig:
270271
def SCHEDULE_DAG_FETCH(self, config_value: str) -> str:
271272
return config_value
272273

274+
@env_property(config_resolver_class=AirflowAndEnvConfigResolver, default_value=DAG_MATERIALIZED_VIEW_UPDATE_DEFAULT_TIMETABLE)
275+
def SCHEDULE_DAG_MATERIALIZED_VIEW_UPDATE(self, config_value: str) -> str:
276+
return config_value
273277

274278
class TedConfigResolver(MongoDBConfig, RMLMapperConfig, XMLProcessorConfig, ELKConfig, LoggingConfig,
275279
GitHubArtefacts, API, AllegroConfig, TedAPIConfig, SFTPConfig, FusekiConfig,

tests/unit/dags/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from airflow.utils import db
88
from psutil.tests import pytest
99

10+
from dags.daily_materialized_views_update import DAILY_MATERIALISED_VIEWS_DAG_NAME
1011
from dags.fetch_notices_by_date import FETCHER_DAG_NAME
1112
from tests import TESTS_PATH
1213

@@ -36,6 +37,10 @@ def dag_bag():
3637
def fetcher_dag_id():
3738
return FETCHER_DAG_NAME
3839

40+
@pytest.fixture
41+
def daily_materialised_views_dag_id():
42+
return DAILY_MATERIALISED_VIEWS_DAG_NAME
43+
3944

4045
@pytest.fixture
4146
def example_cron_table() -> str:
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import os
2+
3+
from airflow import DAG
4+
from airflow.models import DagBag, Variable
5+
from airflow.timetables.trigger import CronTriggerTimetable
6+
7+
from ted_sws import DAG_MATERIALIZED_VIEW_UPDATE_DEFAULT_TIMETABLE
8+
9+
10+
def test_daily_materialised_view_change_timetable_from_airflow_variable_after_reparse(dag_bag: DagBag,
11+
dag_materialised_view_update_schedule_variable_name: str,
12+
daily_materialised_views_dag_id: str,
13+
example_dag_cron_table: CronTriggerTimetable,
14+
airflow_timetable_import_error_name: str):
15+
daily_materialised_view_dag: DAG = dag_bag.get_dag(dag_id=daily_materialised_views_dag_id)
16+
assert daily_materialised_view_dag is not None
17+
assert daily_materialised_view_dag.schedule_interval != example_dag_cron_table._expression
18+
19+
Variable.set(key=dag_materialised_view_update_schedule_variable_name, value=example_dag_cron_table._expression)
20+
dag_bag.collect_dags(only_if_updated=False)
21+
22+
daily_materialised_view_dag: DAG = dag_bag.get_dag(dag_id=daily_materialised_views_dag_id)
23+
assert daily_materialised_view_dag is not None
24+
assert daily_materialised_view_dag.schedule_interval == example_dag_cron_table._expression
25+
26+
assert all(airflow_timetable_import_error_name not in error for error in dag_bag.import_errors.values())
27+
28+
29+
def test_daily_materialised_view_change_timetable_from_env_variable_after_reparse(dag_bag: DagBag,
30+
dag_materialised_view_update_schedule_variable_name: str,
31+
daily_materialised_views_dag_id: str,
32+
example_dag_cron_table: CronTriggerTimetable,
33+
airflow_timetable_import_error_name: str):
34+
fetcher_dag: DAG = dag_bag.get_dag(dag_id=daily_materialised_views_dag_id)
35+
assert fetcher_dag is not None
36+
assert fetcher_dag.schedule_interval != example_dag_cron_table._expression
37+
38+
os.environ[dag_materialised_view_update_schedule_variable_name] = example_dag_cron_table._expression
39+
dag_bag.collect_dags(only_if_updated=False)
40+
41+
fetcher_dag: DAG = dag_bag.get_dag(dag_id=daily_materialised_views_dag_id)
42+
assert fetcher_dag is not None
43+
assert fetcher_dag.schedule_interval == example_dag_cron_table._expression
44+
45+
assert all(airflow_timetable_import_error_name not in error for error in dag_bag.import_errors.values())
46+
del os.environ[dag_materialised_view_update_schedule_variable_name]
47+
48+
49+
def test_daily_materialised_view_has_default_timetable_if_no_variable_is_set_after_reparse(dag_bag: DagBag,
50+
dag_materialised_view_update_schedule_variable_name: str,
51+
daily_materialised_views_dag_id: str,
52+
airflow_timetable_import_error_name: str):
53+
env_var_value = os.getenv(dag_materialised_view_update_schedule_variable_name)
54+
is_env_var_set: bool = True if env_var_value is not None else False
55+
if is_env_var_set:
56+
del os.environ[dag_materialised_view_update_schedule_variable_name]
57+
airflow_var_value = Variable.get(key=dag_materialised_view_update_schedule_variable_name, default_var=None)
58+
is_airflow_var_set: bool = True if airflow_var_value is not None else False
59+
if is_airflow_var_set:
60+
Variable.delete(key=dag_materialised_view_update_schedule_variable_name)
61+
62+
dag_bag.collect_dags()
63+
fetcher_dag: DAG = dag_bag.get_dag(dag_id=daily_materialised_views_dag_id)
64+
assert fetcher_dag is not None
65+
assert fetcher_dag.schedule_interval == DAG_MATERIALIZED_VIEW_UPDATE_DEFAULT_TIMETABLE
66+
assert all(airflow_timetable_import_error_name not in error for error in dag_bag.import_errors.values())
67+
68+
if is_env_var_set:
69+
os.environ[dag_materialised_view_update_schedule_variable_name] = env_var_value
70+
if is_airflow_var_set:
71+
Variable.set(key=dag_materialised_view_update_schedule_variable_name, value=airflow_var_value)
72+
73+
74+
def test_daily_materialised_view_gets_incorrect_timetable_after_reparse(dag_bag: DagBag,
75+
dag_materialised_view_update_schedule_variable_name: str,
76+
daily_materialised_views_dag_id: str,
77+
example_wrong_cron_table: str,
78+
airflow_timetable_import_error_name: str):
79+
fetcher_dag: DAG = dag_bag.get_dag(dag_id=daily_materialised_views_dag_id)
80+
assert fetcher_dag is not None
81+
82+
Variable.set(key=dag_materialised_view_update_schedule_variable_name, value=example_wrong_cron_table)
83+
dag_bag.collect_dags(only_if_updated=False)
84+
85+
assert any(airflow_timetable_import_error_name in error for error in dag_bag.import_errors.values())

0 commit comments

Comments
 (0)