Skip to content

Commit 7bbf41d

Browse files
Merge branch 'main' into feature/TED-607
2 parents 332a69b + 587c515 commit 7bbf41d

38 files changed

Lines changed: 1177 additions & 163 deletions

dags/worker_single_notice_process_orchestrator.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,21 @@
1111
from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryMongoDB
1212
from ted_sws.data_manager.adapters.notice_repository import NoticeRepository
1313
from ted_sws.data_sampler.services.notice_xml_indexer import index_notice
14+
from ted_sws.data_sampler.services.notice_xml_indexer import index_notice_by_id
15+
from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice_by_id
16+
17+
from airflow.decorators import dag
18+
from airflow.operators.python import get_current_context, BranchPythonOperator, PythonOperator
19+
20+
from dags import DEFAULT_DAG_ARGUMENTS
21+
from ted_sws.notice_metadata_processor.services.notice_eligibility import notice_eligibility_checker_by_id
22+
from ted_sws.notice_packager.services.notice_packager import create_notice_package
23+
from ted_sws.notice_transformer.adapters.rml_mapper import RMLMapper
24+
from ted_sws.notice_transformer.services.notice_transformer import transform_notice_by_id
25+
from ted_sws.notice_validator.services.shacl_test_suite_runner import validate_notice_by_id_with_shacl_suite
26+
from ted_sws.notice_validator.services.sparql_test_suite_runner import validate_notice_by_id_with_sparql_suite
27+
from ted_sws.notice_validator.services.xpath_coverage_runner import validate_xpath_coverage_notice_by_id
28+
from ted_sws.event_manager.adapters.event_logger import EventLogger
1429
from ted_sws.event_manager.adapters.event_log_decorator import event_log
1530
from ted_sws.event_manager.adapters.event_logger import EventLogger
1631
from ted_sws.event_manager.model.event_message import NoticeEventMessage, EventMessageProcessType, EventMessageMetadata, \
@@ -190,12 +205,23 @@ def _validate_transformed_rdf_manifestation(**context_args):
190205
event_message: NoticeEventMessage = NoticeEventMessage()
191206
event_message.start_record()
192207

193-
notice = pull_dag_upstream(NOTICE_OBJECT)
194-
mapping_suite = pull_dag_upstream(MAPPING_SUITE_OBJECT)
195-
validate_notice_with_sparql_suite(notice=notice, mapping_suite_package=mapping_suite)
196-
validate_notice_with_shacl_suite(notice=notice, mapping_suite_package=mapping_suite)
197-
notice_id = notice.ted_id
198-
push_dag_downstream(NOTICE_OBJECT, notice)
208+
notice_id = pull_dag_upstream(NOTICE_ID)
209+
mapping_suite_id = pull_dag_upstream(MAPPING_SUITE_ID)
210+
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
211+
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
212+
mapping_suite_repository = MappingSuiteRepositoryMongoDB(mongodb_client=mongodb_client)
213+
validate_notice_by_id_with_sparql_suite(notice_id=notice_id, mapping_suite_identifier=mapping_suite_id,
214+
notice_repository=notice_repository,
215+
mapping_suite_repository=mapping_suite_repository)
216+
validate_notice_by_id_with_shacl_suite(notice_id=notice_id, mapping_suite_identifier=mapping_suite_id,
217+
notice_repository=notice_repository,
218+
mapping_suite_repository=mapping_suite_repository)
219+
validate_xpath_coverage_notice_by_id(notice_id=notice_id, mapping_suite_identifier=mapping_suite_id,
220+
mapping_suite_repository=mapping_suite_repository,
221+
mongodb_client=mongodb_client)
222+
push_dag_downstream(NOTICE_ID, notice_id)
223+
push_dag_downstream(MAPPING_SUITE_ID, mapping_suite_id)
224+
199225
context = get_current_context()
200226

201227
handle_event_message_metadata_dag_context(event_message, context)

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@ pyshacl~=0.19.0
2020
agraph-python==101.0.10
2121
decorator~=5.1.1
2222
urllib3[secure]
23-
semantic-version==2.10.0
23+
semantic-version==2.10.0

ted_sws/core/model/manifestation.py

Lines changed: 125 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
""" """
99
from datetime import datetime
1010
from enum import Enum
11-
from typing import List, Union, Optional
11+
from typing import List, Union, Optional, Dict
1212

1313
from pydantic import Field
1414

@@ -42,10 +42,66 @@ def __str__(self):
4242
return f"/{str(content)[:STR_LEN]}" + ("..." if len(content) > STR_LEN else "") + "/"
4343

4444

45+
class ValidationManifestation(Manifestation):
46+
"""
47+
The validation report
48+
"""
49+
created: str = datetime.now().isoformat()
50+
51+
52+
class XMLValidationManifestation(ValidationManifestation):
53+
"""
54+
55+
"""
56+
mapping_suite_identifier: str
57+
58+
59+
class XPATHCoverageValidationAssertion(PropertyBaseModel):
60+
"""
61+
62+
"""
63+
title: Optional[str]
64+
xpath: Optional[str]
65+
count: Optional[int]
66+
notice_hit: Optional[Dict[str, int]]
67+
query_result: Optional[bool]
68+
69+
70+
class XPATHCoverageValidationResult(PropertyBaseModel):
71+
"""
72+
73+
"""
74+
notice_id: Optional[List[str]] = []
75+
xpath_assertions: Optional[List[XPATHCoverageValidationAssertion]] = []
76+
xpath_covered: Optional[List[str]] = []
77+
xpath_not_covered: Optional[List[str]] = []
78+
xpath_extra: Optional[List[str]] = []
79+
coverage: Optional[float]
80+
conceptual_coverage: Optional[float]
81+
82+
83+
class XPATHCoverageValidationReport(XMLValidationManifestation):
84+
"""
85+
This is the model structure for Notice(s) XPATHs Coverage Report
86+
"""
87+
88+
validation_result: Optional[XPATHCoverageValidationResult]
89+
90+
4591
class XMLManifestation(Manifestation):
4692
"""
4793
Original XML Notice manifestation as published on the TED website.
4894
"""
95+
xpath_coverage_validation: XPATHCoverageValidationReport = None
96+
97+
def add_validation(self, validation: Union[XPATHCoverageValidationReport]):
98+
if type(validation) == XPATHCoverageValidationReport:
99+
self.xpath_coverage_validation: XPATHCoverageValidationReport = validation
100+
101+
def is_validated(self) -> bool:
102+
if self.xpath_coverage_validation:
103+
return True
104+
return False
49105

50106

51107
class METSManifestation(Manifestation):
@@ -54,13 +110,13 @@ class METSManifestation(Manifestation):
54110
"""
55111

56112

57-
class RDFValidationManifestation(Manifestation):
113+
class RDFValidationManifestation(ValidationManifestation):
58114
"""
59-
The validation report
115+
The RDF validation report
116+
60117
"""
61-
created: str = datetime.now().isoformat()
62-
test_suite_identifier: str
63118
mapping_suite_identifier: str
119+
test_suite_identifier: Optional[str]
64120

65121

66122
class SPARQLQuery(PropertyBaseModel):
@@ -128,3 +184,67 @@ def is_validated(self) -> bool:
128184
if len(self.shacl_validations) and len(self.sparql_validations):
129185
return True
130186
return False
187+
188+
189+
class XPATHCoverageSummaryResult(PropertyBaseModel):
190+
xpath_covered: Optional[int] = 0
191+
xpath_not_covered: Optional[int] = 0
192+
193+
194+
class XPATHCoverageSummaryReport(PropertyBaseModel):
195+
mapping_suite_identifier: Optional[str]
196+
validation_result: Optional[XPATHCoverageSummaryResult] = XPATHCoverageSummaryResult()
197+
198+
199+
class XMLManifestationValidationSummaryReport(PropertyBaseModel):
200+
xpath_coverage_summary: Optional[XPATHCoverageSummaryReport] = XPATHCoverageSummaryReport()
201+
202+
203+
class SPARQLSummaryCountReport(PropertyBaseModel):
204+
success: Optional[int] = 0
205+
fail: Optional[int] = 0
206+
error: Optional[int] = 0
207+
208+
209+
class SPARQLSummaryResult(PropertyBaseModel):
210+
test_suite_identifier: Optional[str]
211+
mapping_suite_identifier: Optional[str]
212+
aggregate: Optional[SPARQLSummaryCountReport] = SPARQLSummaryCountReport()
213+
214+
215+
class SPARQLSummaryReport(PropertyBaseModel):
216+
validation_results: Optional[List[SPARQLSummaryResult]] = []
217+
aggregate: Optional[SPARQLSummaryCountReport] = SPARQLSummaryCountReport()
218+
219+
220+
class SHACLSummarySeverityCountReport(PropertyBaseModel):
221+
info: Optional[int] = 0
222+
warning: Optional[int] = 0
223+
violation: Optional[int] = 0
224+
225+
226+
class SHACLSummaryResultSeverityReport(PropertyBaseModel):
227+
aggregate: Optional[SHACLSummarySeverityCountReport] = SHACLSummarySeverityCountReport()
228+
229+
230+
class SHACLSummaryResult(PropertyBaseModel):
231+
test_suite_identifier: Optional[str]
232+
mapping_suite_identifier: Optional[str]
233+
result_severity: Optional[SHACLSummaryResultSeverityReport] = SHACLSummaryResultSeverityReport()
234+
235+
236+
class SHACLSummaryReport(PropertyBaseModel):
237+
validation_results: Optional[List[SHACLSummaryResult]] = []
238+
result_severity: Optional[SHACLSummaryResultSeverityReport] = SHACLSummaryResultSeverityReport()
239+
240+
241+
class RDFManifestationValidationSummaryReport(PropertyBaseModel):
242+
sparql_summary: Optional[SPARQLSummaryReport] = SPARQLSummaryReport()
243+
shacl_summary: Optional[SHACLSummaryReport] = SHACLSummaryReport()
244+
245+
246+
class ValidationSummaryReport(ValidationManifestation):
247+
xml_manifestation: Optional[XMLManifestationValidationSummaryReport] = XMLManifestationValidationSummaryReport()
248+
rdf_manifestation: Optional[RDFManifestationValidationSummaryReport] = RDFManifestationValidationSummaryReport()
249+
distilled_rdf_manifestation: Optional[
250+
RDFManifestationValidationSummaryReport] = RDFManifestationValidationSummaryReport()

ted_sws/core/model/notice.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222

2323
from ted_sws.core.model import PropertyBaseModel
2424
from ted_sws.core.model.manifestation import METSManifestation, RDFManifestation, XMLManifestation, \
25-
RDFValidationManifestation, SPARQLTestSuiteValidationReport, SHACLTestSuiteValidationReport
25+
RDFValidationManifestation, SPARQLTestSuiteValidationReport, SHACLTestSuiteValidationReport, \
26+
XPATHCoverageValidationReport, XMLValidationManifestation, ValidationSummaryReport
2627
from ted_sws.core.model.metadata import TEDMetadata, NormalisedMetadata, XMLMetadata
2728

2829

@@ -69,7 +70,7 @@ def __str__(self):
6970

7071
# possible downstream transitions
7172
NOTICE_STATUS_DOWNSTREAM_TRANSITION = {NoticeStatus.RAW: [NoticeStatus.INDEXED],
72-
NoticeStatus.INDEXED : [NoticeStatus.NORMALISED_METADATA],
73+
NoticeStatus.INDEXED: [NoticeStatus.NORMALISED_METADATA],
7374
NoticeStatus.NORMALISED_METADATA: [NoticeStatus.INELIGIBLE_FOR_TRANSFORMATION,
7475
NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION],
7576
NoticeStatus.INELIGIBLE_FOR_TRANSFORMATION: [
@@ -162,6 +163,7 @@ class Notice(WorkExpression):
162163
_rdf_manifestation: Optional[RDFManifestation] = None
163164
_mets_manifestation: Optional[METSManifestation] = None
164165
xml_metadata: Optional[XMLMetadata] = None
166+
validation_summary: Optional[ValidationSummaryReport] = None
165167

166168
@property
167169
def preprocessed_xml_manifestation(self) -> XMLManifestation:
@@ -201,9 +203,13 @@ def get_distilled_rdf_validation(self) -> Optional[List[RDFValidationManifestati
201203
result.append(shacl_validation)
202204
for sparql_validation in self.distilled_rdf_manifestation.sparql_validations:
203205
result.append(sparql_validation)
204-
205206
return result
206207

208+
def get_xml_validation(self) -> Optional[List[XMLValidationManifestation]]:
209+
result = []
210+
if self.xml_manifestation.xpath_coverage_validation:
211+
result.append(self.xml_manifestation.xpath_coverage_validation)
212+
return result
207213

208214
def set_xml_metadata(self, xml_metadata: XMLMetadata):
209215
"""
@@ -269,13 +275,14 @@ def _check_status_is_validated(self) -> bool:
269275
270276
:return:
271277
"""
272-
if self._rdf_manifestation and self._distilled_rdf_manifestation:
273-
if self._rdf_manifestation.is_validated() and self._distilled_rdf_manifestation.is_validated():
278+
if self._rdf_manifestation and self._distilled_rdf_manifestation and self.xml_manifestation:
279+
if self._rdf_manifestation.is_validated() and self._distilled_rdf_manifestation.is_validated() \
280+
and self.xml_manifestation.is_validated():
274281
return True
275282
return False
276283

277-
def set_rdf_validation(self,
278-
rdf_validation: Union[SPARQLTestSuiteValidationReport, SHACLTestSuiteValidationReport]):
284+
def set_rdf_validation(self, rdf_validation: Union[SPARQLTestSuiteValidationReport,
285+
SHACLTestSuiteValidationReport]):
279286
"""
280287
Add an RDF validation result to the notice.
281288
If METS package data are available, erase them and reset the state.
@@ -290,8 +297,8 @@ def set_rdf_validation(self,
290297
if self._check_status_is_validated():
291298
self.update_status_to(NoticeStatus.VALIDATED)
292299

293-
def set_distilled_rdf_validation(self, rdf_validation: Union[
294-
SPARQLTestSuiteValidationReport, SHACLTestSuiteValidationReport]):
300+
def set_distilled_rdf_validation(self, rdf_validation: Union[SPARQLTestSuiteValidationReport,
301+
SHACLTestSuiteValidationReport]):
295302
"""
296303
297304
:param rdf_validation:
@@ -305,6 +312,16 @@ def set_distilled_rdf_validation(self, rdf_validation: Union[
305312
if self._check_status_is_validated():
306313
self.update_status_to(NoticeStatus.VALIDATED)
307314

315+
def set_xml_validation(self, xml_validation: Union[XPATHCoverageValidationReport]):
316+
"""
317+
Add an XML validation result to the notice.
318+
:param xml_validation:
319+
:return:
320+
"""
321+
self.xml_manifestation.add_validation(validation=xml_validation)
322+
if self._check_status_is_validated():
323+
self.update_status_to(NoticeStatus.VALIDATED)
324+
308325
def set_mets_manifestation(self, mets_manifestation: METSManifestation):
309326
"""
310327
Add a METS package manifestation to the notice.

ted_sws/core/model/transform.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,23 @@ class TransformationTestData(MappingSuiteComponent):
6565
test_data: List[FileResource]
6666

6767

68+
class ConceptualMappingXPATH(MappingSuiteComponent):
69+
xpath: str
70+
name: str
71+
72+
73+
class ConceptualMappingMetadata(MappingSuiteComponent):
74+
base_xpath: Optional[str]
75+
76+
77+
class ConceptualMapping(MappingSuiteComponent):
78+
"""
79+
80+
"""
81+
xpaths: List[ConceptualMappingXPATH] = []
82+
metadata: Optional[ConceptualMappingMetadata]
83+
84+
6885
class MappingSuite(MappingSuiteComponent):
6986
"""
7087
@@ -81,3 +98,4 @@ class MappingSuite(MappingSuiteComponent):
8198
shacl_test_suites: List[SHACLTestSuite]
8299
sparql_test_suites: List[SPARQLTestSuite]
83100
transformation_test_data: TransformationTestData
101+
conceptual_mapping: Optional[ConceptualMapping]

ted_sws/data_manager/adapters/mapping_suite_repository.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88

99
from ted_sws import config
1010
from ted_sws.core.model.transform import MappingSuite, FileResource, TransformationRuleSet, SHACLTestSuite, \
11-
SPARQLTestSuite, MetadataConstraints, TransformationTestData
11+
SPARQLTestSuite, MetadataConstraints, TransformationTestData, ConceptualMapping
1212
from ted_sws.data_manager.adapters.repository_abc import MappingSuiteRepositoryABC
13+
from ted_sws.mapping_suite_processor.services.conceptual_mapping_reader import CONCEPTUAL_MAPPINGS_FILE_NAME, \
14+
mapping_suite_read_conceptual_mapping
1315

1416
METADATA_FILE_NAME = "metadata.json"
1517
TRANSFORM_PACKAGE_NAME = "transformation"
@@ -141,6 +143,12 @@ def _read_sparql_test_suites(self, package_path: pathlib.Path) -> List[SPARQLTes
141143
sparql_tests=self._read_file_resources(path=sparql_test_suite_path))
142144
for sparql_test_suite_path in sparql_test_suite_paths]
143145

146+
@classmethod
147+
def _read_conceptual_mapping(cls, package_path: pathlib.Path) -> ConceptualMapping:
148+
return mapping_suite_read_conceptual_mapping(
149+
package_path / TRANSFORM_PACKAGE_NAME / CONCEPTUAL_MAPPINGS_FILE_NAME
150+
)
151+
144152
def _write_package_metadata(self, mapping_suite: MappingSuite):
145153
"""
146154
This method creates the metadata of a package based on the metadata in the mapping_suite.
@@ -297,6 +305,7 @@ def _read_mapping_suite_package(self, mapping_suite_identifier: str) -> Optional
297305
package_metadata["shacl_test_suites"] = self._read_shacl_test_suites(package_path)
298306
package_metadata["sparql_test_suites"] = self._read_sparql_test_suites(package_path)
299307
package_metadata["transformation_test_data"] = self._read_test_data_package(package_path)
308+
package_metadata["conceptual_mapping"] = self._read_conceptual_mapping(package_path)
300309
return MappingSuite(**package_metadata)
301310
return None
302311

ted_sws/data_sampler/services/notice_xml_indexer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,9 @@ def get_unique_xpaths_covered_by_notices(notice_ids: List[str], mongodb_client:
175175
results = notice_repository.collection.aggregate([{"$match": {"ted_id": {"$in": notice_ids}}}], allowDiskUse=True)
176176
unique_xpaths = set()
177177
for result in results:
178-
unique_xpaths.update(result["xml_metadata"]["unique_xpaths"])
178+
xml_metadata = result["xml_metadata"]
179+
if xml_metadata is not None:
180+
unique_xpaths.update(result["xml_metadata"]["unique_xpaths"])
179181
return list(unique_xpaths)
180182

181183

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
11
CONCEPTUAL_MAPPINGS_METADATA_SHEET_NAME = "Metadata"
2+
CONCEPTUAL_MAPPINGS_RULES_SHEET_NAME = "Rules"
3+
RULES_E_FORM_BT_NAME = 'eForm BT Name (O)'
4+
RULES_FIELD_XPATH = 'Field XPath (M)'
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
from ted_sws.data_manager.adapters.mapping_suite_repository import TRANSFORM_PACKAGE_NAME
2+
from ted_sws.mapping_suite_processor.services.conceptual_mapping_reader import CONCEPTUAL_MAPPINGS_FILE_NAME
23

3-
CONCEPTUAL_MAPPINGS_FILE = '{mappings_path}/{mapping_suite_id}/' + TRANSFORM_PACKAGE_NAME + '/conceptual_mappings.xlsx'
4+
CONCEPTUAL_MAPPINGS_FILE_TEMPLATE = '{mappings_path}/{mapping_suite_id}/' + TRANSFORM_PACKAGE_NAME + '/' \
5+
+ CONCEPTUAL_MAPPINGS_FILE_NAME

0 commit comments

Comments
 (0)