Skip to content

Commit 3c50c50

Browse files
committed
Merge branch 'main' into feature/TED-4
2 parents e6db60c + 8a3dcea commit 3c50c50

103 files changed

Lines changed: 131945 additions & 769 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/unit-tests-hermes.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ jobs:
2727
- name: Get Saxon
2828
run: make init-saxon
2929

30-
- name: Start staging infra
31-
run: make start-project-services
30+
# - name: Start staging infra
31+
# run: make start-project-services
3232

3333
- name: Run unit tests
3434
run: make test-all

MANIFEST.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
recursive-include ted_sws *.rq *.json *.jinja2 *.csv
1+
recursive-include ted_sws *.rq *.json *.jinja2 *.csv *.rml.ttl

Makefile

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,20 +97,20 @@ build-airflow: guard-ENVIRONMENT create-env-airflow build-externals
9797
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate
9898

9999
start-airflow: build-externals
100-
@ echo -e "$(BUILD_PRINT)Starting Airflow servies $(END_BUILD_PRINT)"
100+
@ echo -e "$(BUILD_PRINT)Starting Airflow services $(END_BUILD_PRINT)"
101101
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow/docker-compose.yaml --env-file ${ENV_FILE} up -d
102102

103103
stop-airflow:
104-
@ echo -e "$(BUILD_PRINT)Stoping Airflow services $(END_BUILD_PRINT)"
104+
@ echo -e "$(BUILD_PRINT)Stopping Airflow services $(END_BUILD_PRINT)"
105105
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow/docker-compose.yaml --env-file ${ENV_FILE} down
106106

107107
# ------------------------
108108
start-allegro-graph: build-externals
109-
@ echo -e "$(BUILD_PRINT)Starting Allegro-Graph servies $(END_BUILD_PRINT)"
109+
@ echo -e "$(BUILD_PRINT)Starting Allegro-Graph services $(END_BUILD_PRINT)"
110110
@ docker-compose -p ${ENVIRONMENT} --file ./infra/allegro-graph/docker-compose.yml --env-file ${ENV_FILE} up -d
111111

112112
stop-allegro-graph:
113-
@ echo -e "$(BUILD_PRINT)Stoping Allegro-Graph services $(END_BUILD_PRINT)"
113+
@ echo -e "$(BUILD_PRINT)Stopping Allegro-Graph services $(END_BUILD_PRINT)"
114114
@ docker-compose -p ${ENVIRONMENT} --file ./infra/allegro-graph/docker-compose.yml --env-file ${ENV_FILE} down
115115

116116
# ------------------------
@@ -281,4 +281,27 @@ refresh-mapping-files:
281281
#
282282
#stop-silk-service:
283283
# @ echo -e "Stop silk service"
284-
# @ cd infra/silk/ && docker-compose down
284+
# @ cd infra/silk/ && docker-compose down
285+
286+
287+
#-----------------------------------------------------------------------------
288+
# API Service commands
289+
#-----------------------------------------------------------------------------
290+
build-all-apis: build-id_manager-api
291+
292+
start-all-apis: start-id_manager-api
293+
294+
stop-all-apis: stop-id_manager-api
295+
296+
build-id_manager-api:
297+
@ echo -e "$(BUILD_PRINT) Build id_manager API service $(END_BUILD_PRINT)"
298+
@ docker-compose -p common --file infra/api/docker-compose.yml --env-file ${ENV_FILE} build --no-cache --force-rm
299+
@ docker-compose -p common --file infra/api/docker-compose.yml --env-file ${ENV_FILE} up -d --force-recreate
300+
301+
start-id_manager-api:
302+
@ echo -e "$(BUILD_PRINT)Starting id_manager API service $(END_BUILD_PRINT)"
303+
@ docker-compose -p common --file infra/api/docker-compose.yml --env-file ${ENV_FILE} up -d
304+
305+
stop-id_manager-api:
306+
@ echo -e "$(BUILD_PRINT)Stopping id_manager API service $(END_BUILD_PRINT)"
307+
@ docker-compose -p common --file infra/api/docker-compose.yml --env-file ${ENV_FILE} down

README.md

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,7 @@ to get the Usage Help:
8585
```bash
8686
Usage: resources_injector [OPTIONS] [MAPPING_SUITE_ID]
8787

88-
Injects the requested resources from Conceptual Mappings into the
89-
MappingSuite
88+
Injects the requested resources from Conceptual Mappings into the MappingSuite
9089

9190
Options:
9291
-i, --opt-conceptual-mappings-file TEXT Use to overwrite default INPUT
@@ -96,6 +95,27 @@ Options:
9695
--help Show this message and exit.
9796
```
9897

98+
#### CMD: rml_modules_injector
99+
Injects the requested RML modules from Conceptual Mappings into the MappingSuite.
100+
101+
Use:
102+
```bash
103+
rml_modules_injector --help
104+
```
105+
to get the Usage Help:
106+
```bash
107+
Usage: rml_modules_injector [OPTIONS] [MAPPING_SUITE_ID]
108+
109+
Injects the requested RML modules from Conceptual Mappings into the MappingSuite
110+
111+
Options:
112+
-i, --opt-conceptual-mappings-file TEXT Use to overwrite default INPUT
113+
-o, --opt-output-folder TEXT Use to overwrite default OUTPUT
114+
-r, --opt-rml-modules-folder TEXT
115+
-m, --opt-mappings-folder TEXT
116+
--help Show this message and exit.
117+
```
118+
99119
#### CMD: metadata_generator
100120
Generates metadata.json file from Conceptual Mappings file data.
101121

@@ -189,6 +209,7 @@ By default, successively runs the following commands:
189209
```bash
190210
- normalisation_resource_generator
191211
- resources_injector
212+
- rml_modules_injector
192213
- metadata_generator
193214
- yarrrml2rml_converter
194215
- sparql_generator
@@ -207,7 +228,7 @@ Usage: mapping_suite_processor [OPTIONS] MAPPING_SUITE_ID
207228
yarrrml2rml_converter - sparql_generator
208229

209230
Options:
210-
-c, --opt-commands [normalisation_resource_generator|resources_injector|metadata_generator|yarrrml2rml_converter|sparql_generator]
231+
-c, --opt-commands [normalisation_resource_generator|resources_injector|rml_modules_injector|metadata_generator|yarrrml2rml_converter|sparql_generator]
211232
-m, --opt-mappings-folder TEXT
212233
--help Show this message and exit.
213234
```
@@ -259,7 +280,7 @@ Options:
259280
##### Start local API server
260281
To start the API server:
261282
```bash
262-
id-manager-api-start-server
283+
api-id_manager-start-server
263284
```
264285
Output:
265286
```bash
@@ -269,11 +290,11 @@ See http://localhost:8000/api/v1/docs for API usage.
269290
```
270291
Use:
271292
```bash
272-
api-start-server --help
293+
api-id_manager-start-server --help
273294
```
274295
to get the cli command Usage Help:
275296
```bash
276-
Usage: id-manager-api-start-server [OPTIONS]
297+
Usage: api-id_manager-start-server [OPTIONS]
277298

278299
Options:
279300
-h, --host TEXT

dags/__init__.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@
33
DEFAULT_DAG_ARGUMENTS = {
44
"owner": "airflow",
55
"depends_on_past": False,
6-
"start_date": datetime.now(),
6+
"start_date": datetime.now()-timedelta(minutes=2),
77
"email": ["info@meaningfy.ws"],
88
"email_on_failure": False,
99
"email_on_retry": False,
1010
"retries": 0,
1111
"retry_delay": timedelta(minutes=3600),
12-
"schedule_interval": "@once",
13-
"max_active_runs": 128,
14-
"concurrency": 128,
15-
"execution_timeout": timedelta(hours=24),
12+
"max_active_runs": 15,
13+
"concurrency": 15,
14+
"execution_timeout": timedelta(days=10),
1615
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from datetime import datetime
2+
3+
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
4+
from dateutil import rrule
5+
6+
from dags import DEFAULT_DAG_ARGUMENTS
7+
from airflow.decorators import dag, task
8+
from airflow.operators.python import get_current_context
9+
10+
from dags.fetch_notices_per_day_worker import DATE_WILD_CARD_KEY
11+
12+
START_DATE_KEY = "start_date"
13+
END_DATE_KEY = "end_date"
14+
15+
16+
def generate_daily_dates(start_date: str, end_date: str) -> list:
17+
"""
18+
Given a date range returns all daily dates in that range
19+
:param start_date:
20+
:param end_date:
21+
:return:
22+
"""
23+
return [dt.strftime('%Y%m%d')
24+
for dt in rrule.rrule(rrule.DAILY,
25+
dtstart=datetime.strptime(start_date, '%Y%m%d'),
26+
until=datetime.strptime(end_date, '%Y%m%d'))]
27+
28+
29+
def generate_wild_card_by_date(date: str) -> str:
30+
"""
31+
Method to build a wildcard_date from a date string
32+
:param date:
33+
:return:
34+
"""
35+
return f"{date}*"
36+
37+
38+
@dag(default_args=DEFAULT_DAG_ARGUMENTS, schedule_interval=None, tags=['master', 'fetch_notices_for_date_range'])
39+
def fetch_notices_for_date_range():
40+
@task
41+
def trigger_fetch_notices_workers_for_date_range():
42+
context = get_current_context()
43+
dag_conf = context["dag_run"].conf
44+
45+
if START_DATE_KEY not in dag_conf.keys():
46+
raise "Config key [start_date] is not present in dag context"
47+
if END_DATE_KEY not in dag_conf.keys():
48+
raise "Config key [end_date] is not present in dag context"
49+
50+
for generated_date in generate_daily_dates(dag_conf[START_DATE_KEY], dag_conf[END_DATE_KEY]):
51+
wildcard_date = generate_wild_card_by_date(date=generated_date)
52+
TriggerDagRunOperator(
53+
task_id=f'trigger_fetch_notices_per_day_worker_dag_{wildcard_date[:-1]}',
54+
trigger_dag_id="fetch_notices_per_day_worker",
55+
conf={DATE_WILD_CARD_KEY: wildcard_date}
56+
).execute(context=context)
57+
58+
trigger_fetch_notices_workers_for_date_range()
59+
60+
61+
dag = fetch_notices_for_date_range()
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import datetime
2+
import time
3+
from random import randint
4+
5+
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
6+
7+
from dags import DEFAULT_DAG_ARGUMENTS
8+
from airflow.decorators import dag, task
9+
from airflow.operators.python import get_current_context
10+
from pymongo import MongoClient
11+
12+
from dags.index_and_normalise_notice_worker import NOTICE_ID
13+
from ted_sws import config
14+
from ted_sws.core.model.notice import NoticeStatus
15+
from ted_sws.data_manager.adapters.notice_repository import NoticeRepository
16+
17+
from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI
18+
from ted_sws.notice_fetcher.services.notice_fetcher import NoticeFetcher
19+
20+
DATE_WILD_CARD_KEY = "date_wild_card"
21+
22+
23+
@dag(default_args=DEFAULT_DAG_ARGUMENTS, schedule_interval=None, tags=['worker', 'fetch_notices_per_day'])
24+
def fetch_notices_per_day_worker():
25+
@task
26+
def fetch_notices_and_trigger_index_and_normalise_notice_worker():
27+
context = get_current_context()
28+
dag_conf = context["dag_run"].conf
29+
30+
if DATE_WILD_CARD_KEY not in dag_conf.keys():
31+
raise "Config key [date] is not present in dag context"
32+
33+
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
34+
notice_ids = NoticeFetcher(notice_repository=NoticeRepository(mongodb_client=mongodb_client),
35+
ted_api_adapter=TedAPIAdapter(
36+
request_api=TedRequestAPI())).fetch_notices_by_date_wild_card(
37+
wildcard_date=dag_conf[DATE_WILD_CARD_KEY]) # "20220203*"
38+
for notice_id in notice_ids:
39+
restart_dag_operator = True
40+
while restart_dag_operator:
41+
restart_dag_operator = False
42+
try:
43+
time.sleep(randint(10, 500) / 1000)
44+
TriggerDagRunOperator(
45+
task_id=f'trigger_index_and_normalise_notice_worker_dag_{notice_id}',
46+
trigger_dag_id="index_and_normalise_notice_worker",
47+
trigger_run_id=notice_id,
48+
conf={NOTICE_ID: notice_id}
49+
).execute(context=context)
50+
except Exception as e:
51+
52+
restart_dag_operator = True
53+
print("trigger dag operator restarted !!!")
54+
print("EXCEPTION message: ", e)
55+
56+
fetch_notices_and_trigger_index_and_normalise_notice_worker()
57+
58+
59+
dag = fetch_notices_per_day_worker()
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from pymongo import MongoClient
2+
3+
from dags.dags_utils import pull_dag_upstream, push_dag_downstream
4+
from ted_sws import config
5+
6+
from ted_sws.data_manager.adapters.notice_repository import NoticeRepository
7+
from ted_sws.data_sampler.services.notice_xml_indexer import index_notice
8+
from ted_sws.metadata_normaliser.services.metadata_normalizer import normalise_notice_by_id
9+
10+
from airflow.decorators import dag, task
11+
from airflow.operators.python import get_current_context
12+
13+
from dags import DEFAULT_DAG_ARGUMENTS
14+
15+
NOTICE_ID = "notice_id"
16+
17+
18+
@dag(default_args=DEFAULT_DAG_ARGUMENTS,
19+
schedule_interval=None,
20+
tags=['worker', 'index_and_normalise_notice'])
21+
def index_and_normalise_notice_worker():
22+
"""
23+
24+
:return:
25+
"""
26+
27+
@task
28+
def index_notice_step():
29+
"""
30+
31+
:return:
32+
"""
33+
context = get_current_context()
34+
dag_params = context["dag_run"].conf
35+
notice_id = dag_params[NOTICE_ID]
36+
push_dag_downstream(key=NOTICE_ID, value=notice_id)
37+
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
38+
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
39+
notice = notice_repository.get(reference=notice_id)
40+
indexed_notice = index_notice(notice=notice)
41+
notice_repository.update(notice=indexed_notice)
42+
43+
@task
44+
def normalise_notice_metadata_step():
45+
"""
46+
47+
:return:
48+
"""
49+
notice_id = pull_dag_upstream(NOTICE_ID)
50+
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
51+
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
52+
normalised_notice = normalise_notice_by_id(notice_id=notice_id, notice_repository=notice_repository)
53+
notice_repository.update(notice=normalised_notice)
54+
55+
index_notice_step() >> normalise_notice_metadata_step()
56+
57+
58+
dag = index_and_normalise_notice_worker()

dags/load_mapping_suite_in_mongodb.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
FINISH_LOADING_MAPPING_SUITE_TASK_ID = "finish_loading_mapping_suite"
1919
CHECK_IF_LOAD_TEST_DATA_TASK_ID = "check_if_load_test_data"
2020

21-
@dag(default_args=DEFAULT_DAG_ARGUMENTS, tags=['fetch', 'mapping-suite', 'github'])
21+
22+
@dag(default_args=DEFAULT_DAG_ARGUMENTS,
23+
schedule_interval=None,
24+
tags=['fetch', 'mapping-suite', 'github'])
2225
def load_mapping_suite_in_mongodb():
2326
@task
2427
def fetch_mapping_suite_package_from_github_into_mongodb():

0 commit comments

Comments
 (0)