Skip to content

Commit 3c71f63

Browse files
Merge branch 'main' into feature/TED-593
2 parents b4463a4 + 08ad406 commit 3c71f63

44 files changed

Lines changed: 21557 additions & 155 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ create-env-airflow:
9393

9494
build-airflow: guard-ENVIRONMENT create-env-airflow build-externals
9595
@ echo -e "$(BUILD_PRINT) Build Airflow services $(END_BUILD_PRINT)"
96-
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow/docker-compose.yaml --env-file ${ENV_FILE} build --no-cache --force-rm
96+
@ docker build -t meaningfy/airflow ./infra/airflow/
9797
@ docker-compose -p ${ENVIRONMENT} --file ./infra/airflow/docker-compose.yaml --env-file ${ENV_FILE} up -d --force-recreate
9898

9999
start-airflow: build-externals

dags/selector_daily_fetch_orchestrator.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import datetime
1+
from datetime import datetime, timedelta
22
from typing import List
33

44
from airflow.decorators import dag, task
@@ -40,13 +40,15 @@ def fetch_notice_from_ted():
4040
if WILD_CARD_PARAM in dag_params.keys():
4141
current_datetime_wildcard = dag_params[WILD_CARD_PARAM]
4242
else:
43-
current_datetime_wildcard = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y%m%d*")
43+
current_datetime_wildcard = (datetime.now() - timedelta(days=1)).strftime("%Y%m%d*")
44+
notice_publication_date = datetime.strptime(current_datetime_wildcard, "%Y%m%d*").date()
4445
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
4546
notice_ids = NoticeFetcher(notice_repository=NoticeRepository(mongodb_client=mongodb_client),
4647
ted_api_adapter=TedAPIAdapter(
4748
request_api=TedRequestAPI())).fetch_notices_by_date_wild_card(
4849
wildcard_date=current_datetime_wildcard)
49-
create_and_store_in_mongo_db_daily_supra_notice(notice_ids=notice_ids, mongodb_client=mongodb_client)
50+
create_and_store_in_mongo_db_daily_supra_notice(notice_ids=notice_ids, mongodb_client=mongodb_client,
51+
notice_publication_date=notice_publication_date)
5052
return notice_ids
5153

5254
@task

dags/worker_single_notice_process_orchestrator.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,11 @@ def _normalise_notice_metadata(**context_args):
7575

7676
notice: Notice = pull_dag_upstream(NOTICE_OBJECT)
7777
normalised_notice = normalise_notice(notice=notice)
78-
push_dag_downstream(NOTICE_OBJECT, normalised_notice)
78+
79+
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
80+
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
81+
notice_repository.update(notice=normalised_notice)
82+
push_dag_downstream(NOTICE_ID, notice.ted_id)
7983

8084
context = get_current_context()
8185

@@ -92,14 +96,14 @@ def _check_eligibility_for_transformation(**context_args):
9296
event_message: NoticeEventMessage = NoticeEventMessage()
9397
event_message.start_record()
9498

95-
notice = pull_dag_upstream(NOTICE_OBJECT)
99+
notice_id = pull_dag_upstream(NOTICE_ID)
96100
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
97101
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
102+
notice = notice_repository.get(reference=notice_id)
98103
mapping_suite_repository = MappingSuiteRepositoryMongoDB(mongodb_client=mongodb_client)
99104
result = notice_eligibility_checker(notice=notice, mapping_suite_repository=mapping_suite_repository)
100105
notice_repository.update(notice=notice)
101106
mapping_suite_id = None
102-
notice_id = notice.ted_id
103107
if result:
104108
notice_id, mapping_suite_id = result
105109
push_dag_downstream(MAPPING_SUITE_ID, mapping_suite_id)
@@ -441,6 +445,7 @@ def _check_notice_state_before_notice_successfully_processed():
441445
state_skip_table = {
442446
NoticeStatus.RAW: "index_notice_xml_content",
443447
NoticeStatus.INDEXED: "index_notice_xml_content",
448+
NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION: "check_eligibility_for_transformation",
444449
NoticeStatus.NORMALISED_METADATA: "check_eligibility_for_transformation",
445450
NoticeStatus.ELIGIBLE_FOR_PACKAGING: "check_notice_state_before_generate_mets_package",
446451
NoticeStatus.ELIGIBLE_FOR_PUBLISHING: "check_notice_state_before_publish_notice_in_cellar",

infra/airflow/docker-compose.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ x-airflow-common:
4545
# Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml
4646
# and uncomment the "build" line below, Then run `docker-compose build` to build the images.
4747
#image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.2.2-python3.8}
48-
build: .
48+
#build: .
49+
image: meaningfy/airflow:latest
4950
env_file:
5051
- ../../.env
5152
environment:

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def open_local(paths, mode="r", encoding="utf8"):
7878
"sparql_runner = ted_sws.notice_validator.entrypoints.cli.cmd_sparql_runner:main",
7979
"shacl_runner = ted_sws.notice_validator.entrypoints.cli.cmd_shacl_runner:main",
8080
"xpath_coverage_runner = ted_sws.notice_validator.entrypoints.cli.cmd_xpath_coverage_runner:main",
81+
"validation_summary_runner = ted_sws.notice_validator.entrypoints.cli.cmd_validation_summary_runner:main",
8182
"rml_report_generator = ted_sws.rml_to_html.entrypoints.cli.cmd_rml_report_generator:main",
8283
"mapping_suite_processor = ted_sws.mapping_suite_processor.entrypoints.cli.cmd_mapping_suite_processor:main",
8384
"metadata_generator = ted_sws.mapping_suite_processor.entrypoints.cli.cmd_metadata_generator:main",

ted_sws/core/model/manifestation.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ class XPATHCoverageValidationAssertion(PropertyBaseModel):
6060
"""
6161
6262
"""
63+
standard_form_field_id: Optional[str]
64+
eform_bt_id: Optional[str]
6365
title: Optional[str]
6466
xpath: Optional[str]
6567
count: Optional[int]

ted_sws/core/model/supra_notice.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
""" This module implements aggregates over groups of notices and the appropriate business needs, on those groups """
99
import abc
10-
from datetime import datetime, time
10+
from datetime import datetime, date
1111
from typing import List, Optional
1212

1313
from ted_sws.core.model import PropertyBaseModel
@@ -45,5 +45,7 @@ class DailySupraNotice(SupraNotice):
4545
"""
4646
This is an aggregate over the notices published in TED in a specific day.
4747
"""
48-
notice_publication_day: datetime = datetime.combine(datetime.today(), time())
48+
notice_publication_date: date
4949
validation_report: Optional[SupraNoticeValidationReport]
50+
51+

ted_sws/core/model/transform.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ class TransformationTestData(MappingSuiteComponent):
6767

6868
class ConceptualMappingXPATH(MappingSuiteComponent):
6969
xpath: str
70-
name: str
70+
name: Optional[str]
71+
standard_form_field_id: Optional[str]
72+
eform_bt_id: Optional[str]
7173

7274

7375
class ConceptualMappingMetadata(MappingSuiteComponent):

ted_sws/data_manager/adapters/supra_notice_repository.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from datetime import datetime, time
12
from typing import Optional, Iterator
23

34
from pymongo import MongoClient, ASCENDING
@@ -6,7 +7,7 @@
67
from ted_sws.core.model.supra_notice import DailySupraNotice
78
from ted_sws.data_manager.adapters.repository_abc import DailySupraNoticeRepositoryABC
89

9-
DAILY_SUPRA_NOTICE_ID = "notice_publication_day"
10+
DAILY_SUPRA_NOTICE_ID = "notice_publication_date"
1011

1112

1213
class DailySupraNoticeRepository(DailySupraNoticeRepositoryABC):
@@ -26,6 +27,8 @@ def __init__(self, mongodb_client: MongoClient, database_name: str = _database_n
2627

2728
def _update_daily_supra_notice(self, daily_supra_notice: DailySupraNotice, upsert: bool = False):
2829
daily_supra_notice_dict = daily_supra_notice.dict()
30+
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID] = datetime.combine(
31+
daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID], time())
2932
self.collection.update_one({DAILY_SUPRA_NOTICE_ID: daily_supra_notice_dict[DAILY_SUPRA_NOTICE_ID]},
3033
{"$set": daily_supra_notice_dict}, upsert=upsert)
3134

@@ -43,19 +46,18 @@ def update(self, daily_supra_notice: DailySupraNotice):
4346
:param daily_supra_notice:
4447
:return:
4548
"""
46-
daily_supra_notice_exist = self.collection.find_one(
47-
{DAILY_SUPRA_NOTICE_ID: daily_supra_notice.notice_publication_day})
48-
if daily_supra_notice_exist is not None:
49-
self._update_daily_supra_notice(daily_supra_notice=daily_supra_notice)
49+
self._update_daily_supra_notice(daily_supra_notice=daily_supra_notice)
5050

5151
def get(self, reference) -> Optional[DailySupraNotice]:
5252
"""
5353
This method allows a daily_supra_notice to be obtained based on an identification reference.
5454
:param reference:
5555
:return: DailySupraNotice
5656
"""
57+
reference = datetime.combine(reference, time())
5758
result_dict = self.collection.find_one({DAILY_SUPRA_NOTICE_ID: reference})
5859
if result_dict is not None:
60+
result_dict[DAILY_SUPRA_NOTICE_ID] = result_dict[DAILY_SUPRA_NOTICE_ID].date()
5961
daily_supra_notice = DailySupraNotice.parse_obj(result_dict)
6062
return daily_supra_notice
6163
return None
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
CONCEPTUAL_MAPPINGS_METADATA_SHEET_NAME = "Metadata"
22
CONCEPTUAL_MAPPINGS_RULES_SHEET_NAME = "Rules"
33
RULES_E_FORM_BT_NAME = 'eForm BT Name (O)'
4-
RULES_FIELD_XPATH = 'Field XPath (M)'
4+
RULES_FIELD_XPATH = 'Field XPath (M)'
5+
RULES_SF_FIELD_ID = 'Standard Form Field ID (M)'
6+
RULES_E_FORM_BT_ID = 'eForm BT-ID (O)'

0 commit comments

Comments
 (0)