Skip to content

Commit 526ad50

Browse files
author
Kolea Plesco
committed
Merge remote-tracking branch 'origin/main' into feature/TED-677
2 parents 0d52116 + b8af182 commit 526ad50

31 files changed

Lines changed: 4563 additions & 666 deletions

dags/notice_validation_workflow.py

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,16 @@ def notice_daily_validation_workflow():
3333
))
3434
)
3535
def validate_fetched_notices():
36+
"""
37+
:return:
38+
"""
3639
from ted_sws import config
3740
from ted_sws.supra_notice_manager.services.supra_notice_validator import validate_and_update_daily_supra_notice
3841

3942
publication_date = get_notice_publication_date()
4043
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
4144
validate_and_update_daily_supra_notice(notice_publication_day=publication_date,
42-
mongodb_client=mongodb_client
43-
)
45+
mongodb_client=mongodb_client)
4446

4547
@task
4648
@event_log(TechnicalEventMessage(
@@ -50,15 +52,19 @@ def validate_fetched_notices():
5052
))
5153
)
5254
def summarize_validation_for_daily_supra_notice():
53-
from ted_sws import config
54-
from ted_sws.supra_notice_manager.services.supra_notice_validator import \
55-
summary_validation_for_daily_supra_notice
56-
57-
publication_date = get_notice_publication_date()
58-
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
59-
summary_validation_for_daily_supra_notice(notice_publication_day=publication_date,
60-
mongodb_client=mongodb_client
61-
)
55+
"""
56+
:return:
57+
"""
58+
# Temporally disable DailySupraNotice validation summary
59+
# from ted_sws import config
60+
# from ted_sws.supra_notice_manager.services.supra_notice_validator import \
61+
# summary_validation_for_daily_supra_notice
62+
#
63+
# publication_date = get_notice_publication_date()
64+
# mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
65+
# summary_validation_for_daily_supra_notice(notice_publication_day=publication_date,
66+
# mongodb_client=mongodb_client
67+
# )
6268

6369
@task
6470
@event_log(TechnicalEventMessage(
@@ -69,23 +75,13 @@ def summarize_validation_for_daily_supra_notice():
6975
)
7076
def validate_availability_of_notice_in_cellar():
7177
from ted_sws import config
72-
from ted_sws.data_manager.adapters.notice_repository import NoticeRepository
73-
from ted_sws.data_manager.adapters.supra_notice_repository import DailySupraNoticeRepository
74-
from ted_sws.notice_validator.services.check_availability_of_notice_in_cellar import \
75-
validate_notice_availability_in_cellar
78+
from ted_sws.supra_notice_manager.services.supra_notice_validator import \
79+
validate_and_update_supra_notice_availability_in_cellar
7680

7781
notice_publication_day = get_notice_publication_date()
7882
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
79-
repo = DailySupraNoticeRepository(mongodb_client=mongodb_client)
80-
supra_notice = repo.get(reference=notice_publication_day)
81-
if supra_notice:
82-
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
83-
for notice_id in supra_notice.notice_ids:
84-
notice = notice_repository.get(reference=notice_id)
85-
old_notice_status = notice.status
86-
notice = validate_notice_availability_in_cellar(notice=notice)
87-
if notice.status != old_notice_status:
88-
notice_repository.update(notice=notice)
83+
validate_and_update_supra_notice_availability_in_cellar(notice_publication_day=notice_publication_day,
84+
mongodb_client=mongodb_client)
8985

9086
validate_fetched_notices() >> summarize_validation_for_daily_supra_notice() >> validate_availability_of_notice_in_cellar()
9187

dags/operators/DagBatchPipelineOperator.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
DEFAULT_NUBER_OF_CELERY_WORKERS = 144
2020
NOTICE_PROCESS_WORKFLOW_DAG_NAME = "notice_process_workflow"
2121
DEFAULT_START_WITH_TASK_ID = "notice_normalisation_pipeline"
22-
DEFAULT_PIPELINE_NAME_FOR_LOGS = "unknown_pipeline_name"
22+
DEFAULT_PIPELINE_NAME_FOR_LOGS = "unknown_pipeline_name"
23+
2324

2425
class BatchPipelineCallable(Protocol):
2526

dags/pipelines/notice_processor_pipelines.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,23 @@ def notice_validation_pipeline(notice: Notice, mongodb_client: MongoClient) -> N
4949
from ted_sws.notice_validator.services.validation_summary_runner import validation_summary_report_notice
5050
from ted_sws.notice_validator.services.xpath_coverage_runner import validate_xpath_coverage_notice
5151
from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryMongoDB
52+
from ted_sws.event_manager.services.log import log_notice_info
5253

5354
mapping_suite_id = notice.distilled_rdf_manifestation.mapping_suite_id
5455
mapping_suite_repository = MappingSuiteRepositoryMongoDB(mongodb_client=mongodb_client)
5556
mapping_suite = mapping_suite_repository.get(reference=mapping_suite_id)
57+
log_notice_info(message="Validation :: XPATH coverage :: START", notice_id=notice.ted_id)
5658
validate_xpath_coverage_notice(notice=notice, mapping_suite=mapping_suite, mongodb_client=mongodb_client)
59+
log_notice_info(message="Validation :: XPATH coverage :: END", notice_id=notice.ted_id)
60+
log_notice_info(message="Validation :: SPARQL :: START", notice_id=notice.ted_id)
5761
validate_notice_with_sparql_suite(notice=notice, mapping_suite_package=mapping_suite)
62+
log_notice_info(message="Validation :: SPARQL :: END", notice_id=notice.ted_id)
63+
log_notice_info(message="Validation :: SHACL :: START", notice_id=notice.ted_id)
5864
validate_notice_with_shacl_suite(notice=notice, mapping_suite_package=mapping_suite)
65+
log_notice_info(message="Validation :: SHACL :: END", notice_id=notice.ted_id)
66+
log_notice_info(message="Validation :: Summary :: START", notice_id=notice.ted_id)
5967
validation_summary_report_notice(notice=notice)
68+
log_notice_info(message="Validation :: Summary :: END", notice_id=notice.ted_id)
6069
return NoticePipelineOutput(notice=notice)
6170

6271

@@ -78,11 +87,13 @@ def notice_publish_pipeline(notice: Notice, mongodb_client: MongoClient) -> Noti
7887
"""
7988
from ted_sws.notice_publisher.services.notice_publisher import publish_notice, publish_notice_rdf_into_s3
8089
from ted_sws.event_manager.services.log import log_notice_error
90+
from ted_sws import config
8191

82-
published_into_s3 = publish_notice_rdf_into_s3(notice=notice)
83-
if not published_into_s3:
84-
log_notice_error(message="Can't load notice distilled rdf manifestation into S3 bucket!",
85-
notice_id=notice.ted_id)
92+
if config.S3_PUBLISH_ENABLED:
93+
published_into_s3 = publish_notice_rdf_into_s3(notice=notice)
94+
if not published_into_s3:
95+
log_notice_error(message="Can't load notice distilled rdf manifestation into S3 bucket!",
96+
notice_id=notice.ted_id)
8697
notice.set_is_eligible_for_publishing(eligibility=True)
8798
result = publish_notice(notice=notice)
8899
if result:

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ ordered-set~=4.0.2
2525
json2html~=1.3.0
2626
minio~=7.1.1
2727
certifi
28+
networkx~=2.8.8

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ def open_local(paths, mode="r", encoding="utf8"):
9595
"normalisation_resource_generator = ted_sws.data_manager.entrypoints.cli.cmd_generate_mapping_resources:main",
9696
"s3_rdf_publisher = ted_sws.notice_publisher.entrypoints.cli.cmd_s3_rdf_publisher:main",
9797
"bulk_packager = ted_sws.notice_packager.entrypoints.cli.cmd_bulk_packager:main",
98-
"api-digest_service-start-server = ted_sws.notice_transformer.entrypoints.api.digest_service.server:api_server_start"
98+
"api-digest_service-start-server = ted_sws.notice_transformer.entrypoints.api.digest_service.server:api_server_start",
99+
"rdf_component_detector = ted_sws.rdf_component_detector.entrypoints.cli.cmd_rdf_component_detector:main"
99100
],
100101
},
101102
include_package_data=True,

ted_sws/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ def S3_PUBLISH_REGION(self) -> str:
238238

239239
@property
240240
def S3_PUBLISH_SSL_VERIFY(self) -> bool:
241-
return True if AirflowAndEnvConfigResolver().config_resolve() == "1" else False
241+
return AirflowAndEnvConfigResolver().config_resolve() == "1"
242242

243243
@property
244244
def S3_PUBLISH_NOTICE_BUCKET(self) -> str:
@@ -248,6 +248,10 @@ def S3_PUBLISH_NOTICE_BUCKET(self) -> str:
248248
def S3_PUBLISH_NOTICE_RDF_BUCKET(self) -> str:
249249
return AirflowAndEnvConfigResolver().config_resolve()
250250

251+
@property
252+
def S3_PUBLISH_ENABLED(self) -> bool:
253+
return AirflowAndEnvConfigResolver().config_resolve() == "1"
254+
251255

252256
class TedConfigResolver(MongoDBConfig, RMLMapperConfig, XMLProcessorConfig, ELKConfig, LoggingConfig,
253257
GitHubArtefacts, API, AllegroConfig, TedAPIConfig, SFTPConfig, FusekiConfig,

ted_sws/core/adapters/cmd_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
DEFAULT_MAPPINGS_PATH = 'mappings'
1919
DEFAULT_OUTPUT_PATH = 'output'
20-
EXIT_CODE_OK = os.EX_OK
20+
EXIT_CODE_OK = 0 # os.EX_OK
2121
DEFAULT_EXIT_CODE = EXIT_CODE_OK
2222

2323

ted_sws/core/model/supra_notice.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,10 @@ class SupraNoticeValidationReport(Manifestation):
3535
Result of checking whether all the notices published in TED are present in the internal database.
3636
"""
3737
missing_notice_ids: Optional[List[str]]
38+
not_published_notice_ids: Optional[List[str]]
3839

3940
def is_valid(self):
40-
if not self.missing_notice_ids:
41+
if not self.missing_notice_ids and not self.not_published_notice_ids:
4142
return True
4243
return False
4344

ted_sws/notice_validator/adapters/validation_summary_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def notice_shacl_summary(self, notice: Notice, report: RDFManifestationValidatio
106106
validation_results = shacl_report.validation_results
107107
is_new, result_validation = self.shacl_summary_result(shacl_report, result_counts)
108108
result_count: SHACLSummarySeverityCountReport = result_validation.result_severity.aggregate
109-
if validation_results:
109+
if validation_results and validation_results.results_dict:
110110
bindings = validation_results.results_dict['results']['bindings']
111111
for binding in bindings:
112112
result_severity = binding['resultSeverity']
File renamed without changes.

0 commit comments

Comments
 (0)