Skip to content

Commit bd7d19b

Browse files
committed
implement failing tests regarding mets package char encoding issue
1 parent 3b5d081 commit bd7d19b

4 files changed

Lines changed: 154 additions & 0 deletions

File tree

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
{
2+
"notice": {
3+
"id": "003545_2021",
4+
"public_number_document": "003545",
5+
"public_number_edition": "2021004"
6+
},
7+
"mets": {
8+
"languages": [
9+
"en"
10+
],
11+
"revision": "0",
12+
"type": "create",
13+
"profile": "http://publications.europa.eu/resource/mets/op-sip-profile_002",
14+
"createdate": "2023-03-09T18:28:54.804225",
15+
"document_id": "",
16+
"dmd_id": "dmd_2021_S_004_003545_0_001",
17+
"dmd_mdtype": "OTHER",
18+
"dmd_othermdtype": "INSTANCE",
19+
"dmd_href": "2021_S_004_003545_0.mets.xml.dmd.rdf",
20+
"tmd_id": "tmd_2021_S_004_003545_0_001",
21+
"tmd_href": "2021_S_004_003545_0.tmd.rdf",
22+
"tmd_mdtype": "OTHER",
23+
"tmd_othermdtype": "INSTANCE",
24+
"file_id": "file_2021_S_004_003545_0_001",
25+
"notice_file_href": "2021_S_004_003545_0.notice.rdf",
26+
"notice_file_mimetype": "application/rdf+xml",
27+
"notice_file_checksum": "00e2c0570f2d9f00c71c3d8009b8bec5a530167a01ebb473e67be5e97383cdc5",
28+
"notice_file_checksum_type": "SHA-256"
29+
},
30+
"work": {
31+
"identifier": "2021_S_004_003545",
32+
"oj_identifier": "JOS_2021_004_R_003545",
33+
"cdm_rdf_type": "procurement_public",
34+
"resource_type": "PROCUREMENT_NOTICE",
35+
"uri": "http://data.europa.eu/a4g/resource/2021/003545_2021",
36+
"do_not_index": "true",
37+
"date_document": "2021-01-07",
38+
"created_by_agent": "EURUN",
39+
"dataset_published_by_agent": "EURUN",
40+
"datetime_transmission": "2023-03-09T18:28:54.806241",
41+
"title": {
42+
"en": "Construction work & planning",
43+
"ro": "Lucrari de constructie <br /> si planificare"
44+
},
45+
"date_creation": "2023-03-09",
46+
"concept_type_dataset": "TEST_DATA",
47+
"dataset_version": "20230309-0",
48+
"dataset_keyword": [
49+
"eProcurement",
50+
"notice"
51+
],
52+
"dataset_has_frequency_publication_frequency": "OTHER",
53+
"procurement_public_issued_by_country": "CZ",
54+
"procurement_public_url_etendering": []
55+
},
56+
"expression": {
57+
"identifier": "2021_S_004_003545.MUL",
58+
"title": {
59+
"en": " eProcurement notice 2021_S_004_003545 "
60+
},
61+
"uses_language": "MUL"
62+
},
63+
"manifestation": {
64+
"identifier": "2021_S_004_003545.MUL.rdf",
65+
"type": "rdf_epo",
66+
"date_publication": "2021-01-07",
67+
"distribution_has_status_distribution_status": "COMPLETED",
68+
"distribution_has_media_type_concept_media_type": "RDF"
69+
}
70+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!DOCTYPE rdf:RDF [
3+
<!ENTITY % cellarEntities PUBLIC
4+
"-//PO-RESOURCE//ENTITIES CELLAR cdm model 1.0//EN"
5+
"/home/metaconv/metaconv_components/components/common/data/cellar_uris.ent">
6+
%cellarEntities;
7+
]>
8+
<rdf:RDF xmlns:cdm="http://publications.europa.eu/ontology/cdm#"
9+
xmlns:dct="http://purl.org/dc/terms/"
10+
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
11+
12+
<cdm:work rdf:about="&resource;ted/2021_S_004_003545">
13+
14+
<rdf:type rdf:resource="http://publications.europa.eu/ontology/cdm#procurement_public"/>
15+
<cdm:work_id_document rdf:datatype="http://www.w3.org/2001/XMLSchema#string">ted:2021_S_004_003545</cdm:work_id_document>
16+
<cdm:work_id_document rdf:datatype="http://www.w3.org/2001/XMLSchema#string">oj:JOS_2021_004_R_003545</cdm:work_id_document>
17+
<cdm:work_has_resource-type rdf:resource="http://publications.europa.eu/resource/authority/resource-type/PROCUREMENT_NOTICE"/>
18+
<cdm:do_not_index rdf:datatype="http://www.w3.org/2001/XMLSchema#boolean">true</cdm:do_not_index>
19+
<cdm:work_date_document rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2021-01-07</cdm:work_date_document>
20+
<cdm:work_created_by_agent rdf:resource="&cellar-authority;corporate-body/EURUN"/>
21+
<cdm:procurement_public_number_document_in_official-journal rdf:datatype="http://www.w3.org/2001/XMLSchema#string">003545</cdm:procurement_public_number_document_in_official-journal>
22+
<cdm:procurement_public_number_edition rdf:datatype="http://www.w3.org/2001/XMLSchema#positiveInteger">2021004</cdm:procurement_public_number_edition>
23+
24+
<cdm:work_title xml:lang="en">Construction work & planning</cdm:work_title>
25+
26+
<cdm:work_title xml:lang="ro">Lucrari de constructie <br /> si planificare</cdm:work_title>
27+
28+
<cdm:datetime_transmission rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2023-03-09T18:28:54.806241</cdm:datetime_transmission>
29+
30+
31+
</cdm:work>
32+
33+
<cdm:expression rdf:about="&resource;ted/2021_S_004_003545.MUL">
34+
35+
<rdf:type rdf:resource="http://publications.europa.eu/ontology/cdm#expression_procurement_public"/>
36+
<cdm:expression_belongs_to_work rdf:resource="&resource;ted/2021_S_004_003545"/>
37+
38+
<cdm:expression_title xml:lang="en"> eProcurement notice 2021_S_004_003545 </cdm:expression_title>
39+
40+
<cdm:expression_uses_language rdf:resource="&cellar-authority;language/MUL"/>
41+
<cdm:expression_procurement_public_authority-type_name rdf:datatype="http://www.w3.org/2001/XMLSchema#string">Other</cdm:expression_procurement_public_authority-type_name>
42+
</cdm:expression>
43+
44+
<cdm:manifestation_distribution rdf:about="&resource;ted/2021_S_004_003545.MUL.rdf">
45+
46+
<cdm:manifestation_manifests_expression rdf:resource="&resource;ted/2021_S_004_003545.MUL"/>
47+
<cdm:manifestation_type rdf:datatype="http://www.w3.org/2001/XMLSchema#string">rdf_epo</cdm:manifestation_type>
48+
<cdm:manifestation_date_publication rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2021-01-07</cdm:manifestation_date_publication>
49+
<cdm:manifestation_distribution_has_status_distribution_status rdf:resource="http://publications.europa.eu/resource/authority/distribution-status/COMPLETED"/>
50+
<cdm:manifestation_distribution_has_media_type_concept_media_type rdf:resource="http://publications.europa.eu/resource/authority/file-type/RDF"/>
51+
</cdm:manifestation_distribution>
52+
</rdf:RDF>

tests/unit/notice_packager/conftest.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,21 @@ def template_sample_metadata_json() -> Dict:
3030
return json.load((TEST_DATA_PATH / "notice_packager" / "template_metadata.json").open())
3131

3232

33+
@pytest.fixture
34+
def sample_metadata_with_wrong_title_json() -> Dict:
35+
return json.load((TEST_DATA_PATH / "notice_packager" / "wrong_title" / "metadata_with_wrong_title.json").open())
36+
37+
38+
@pytest.fixture
39+
def sample_mets_xml_dmd_rdf_with_wrong_title_str() -> str:
40+
return (TEST_DATA_PATH / "notice_packager" / "wrong_title" / "mets_with_wrong_title.mets.xml.dmd.rdf").read_text()
41+
42+
43+
@pytest.fixture
44+
def sample_metadata_with_wrong_title(sample_metadata_with_wrong_title_json) -> PackagerMetadata:
45+
return PackagerMetadata(**sample_metadata_with_wrong_title_json)
46+
47+
3348
@pytest.fixture
3449
def template_sample_metadata(template_sample_metadata_json) -> PackagerMetadata:
3550
return PackagerMetadata(**template_sample_metadata_json)
@@ -54,6 +69,7 @@ def template_sample_expression(template_sample_metadata) -> ExpressionMetadata:
5469
def template_sample_manifestation(template_sample_metadata) -> ManifestationMetadata:
5570
return template_sample_metadata.manifestation
5671

72+
5773
# template_metadata END
5874

5975

@@ -67,6 +83,7 @@ def notice_sample_metadata(notice_2018) -> NormalisedMetadata:
6783

6884
return normalised_metadata
6985

86+
7087
# notice_metadata END
7188

7289

tests/unit/notice_packager/test_template_generator.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@
77

88
""" """
99
import re
10+
from xml.etree import ElementTree
11+
from xml.etree.ElementTree import ParseError
1012

1113
import pytest
1214

1315
from ted_sws.notice_packager.adapters.template_generator import TemplateGenerator
16+
from ted_sws.notice_packager.model.metadata import PackagerMetadata
1417
from tests import TEST_DATA_PATH
1518

1619

@@ -57,3 +60,15 @@ def test_mets2action_mets_xml_generator_with_wrong_action(template_sample_metada
5760
template_sample_metadata.mets.type = "wrong_action"
5861
with pytest.raises(ValueError):
5962
TemplateGenerator.mets2action_mets_xml_generator(template_sample_metadata)
63+
64+
65+
def test_mets_dmd_rdf_has_html_safe_sequences_after_generation(sample_metadata_with_wrong_title: PackagerMetadata,
66+
sample_mets_xml_dmd_rdf_with_wrong_title_str: str):
67+
# Ensure parser raises error on not well-formed xml (HTML sequences or elements)
68+
with pytest.raises(ParseError):
69+
ElementTree.fromstring(sample_mets_xml_dmd_rdf_with_wrong_title_str)
70+
71+
mets_dmd_rdf: str = TemplateGenerator.mets_xml_dmd_rdf_generator(sample_metadata_with_wrong_title)
72+
73+
# Parse to check if xml is well-formed (HTML-safe sequences or elements)
74+
ElementTree.fromstring(mets_dmd_rdf)

0 commit comments

Comments
 (0)