Skip to content

Commit c3bf4a6

Browse files
author
Kolea Plesco
committed
Bulk packager updates + tests
1 parent ddd427d commit c3bf4a6

9 files changed

Lines changed: 145 additions & 113 deletions

File tree

ted_sws/notice_packager/entrypoints/cli/cmd_bulk_packager.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from ted_sws.core.model.manifestation import XMLManifestation
2222
from ted_sws.core.model.notice import Notice
2323
from ted_sws.data_manager.adapters.notice_repository import NoticeRepository
24+
from ted_sws.event_manager.adapters.log import LOG_WARN_TEXT
2425
from ted_sws.notice_metadata_processor.services.xml_manifestation_metadata_extractor import \
2526
XMLManifestationMetadataExtractor
2627
from ted_sws.notice_packager import DEFAULT_NOTICE_PACKAGE_EXTENSION
@@ -43,7 +44,9 @@ def __init__(self, rdf_files_folder, output_folder, pkgs_count: int, notice_ids:
4344
mongodb_client=MongoClient(config.MONGO_DB_AUTH_URL)):
4445
super().__init__(name=CMD_NAME)
4546
self.output_path = Path(os.path.realpath(output_folder))
47+
self.notices = None
4648
if notice_ids:
49+
self.log(LOG_WARN_TEXT.format("Notices: ") + str(notice_ids))
4750
self.notice_repository = NoticeRepository(mongodb_client=mongodb_client)
4851
self.notices = []
4952
for notice_id in notice_ids:
@@ -56,10 +59,11 @@ def __init__(self, rdf_files_folder, output_folder, pkgs_count: int, notice_ids:
5659
self.log_failed_msg(error_msg)
5760
raise FileNotFoundError(error_msg)
5861

62+
self.output_path.mkdir(parents=True, exist_ok=True)
63+
5964
def run_cmd(self):
6065
error = None
6166
try:
62-
self.output_path.mkdir(parents=True, exist_ok=True)
6367
if self.notices:
6468
self.log("Saving packages to " + str(self.output_path))
6569
for notice in self.notices:
@@ -76,23 +80,20 @@ def run_cmd(self):
7680
rdf_idx = i % rdf_files_count
7781
rdf_file_path = rdf_files[rdf_idx]
7882
notice_id = str(base_idx + i) + "_" + str(year)
79-
pkg_name = notice_id
80-
self.generate_package(notice_id, self.output_path, rdf_file_path, pkg_name)
83+
self.generate_package(notice_id, self.output_path, rdf_file_path)
8184
except Exception as e:
8285
error = e
8386

8487
return self.run_cmd_result(error)
8588

8689
@classmethod
87-
def generate_package(cls, notice_id, output_path, rdf_file_path, pkg_name):
90+
def generate_package(cls, notice_id, output_path, rdf_file_path):
8891

8992
with open(rdf_file_path, "r") as f:
9093
rdf_content = f.read()
9194

9295
encoded_rdf_content = base64.b64encode(bytes(rdf_content, 'utf-8'))
9396

94-
output_file = output_path / (pkg_name + DEFAULT_NOTICE_PACKAGE_EXTENSION)
95-
9697
notice = PackageNotice(ted_id=notice_id)
9798
notice_metadata = XMLManifestationMetadataExtractor(
9899
xml_manifestation=notice.xml_manifestation).to_metadata()
@@ -101,7 +102,7 @@ def generate_package(cls, notice_id, output_path, rdf_file_path, pkg_name):
101102
create_notice_package(
102103
notice_metadata,
103104
rdf_content=encoded_rdf_content,
104-
save_to=output_file
105+
save_to=output_path
105106
)
106107

107108

tests/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from ted_sws.core.model.manifestation import XMLManifestation, RDFManifestation
1212
from ted_sws.core.model.metadata import TEDMetadata, LanguageTaggedString, NormalisedMetadata, XMLMetadata
1313
from ted_sws.core.model.notice import Notice, NoticeStatus
14+
from ted_sws.data_manager.adapters.notice_repository import NoticeRepositoryInFileSystem
1415
from ted_sws.notice_metadata_processor.services.metadata_normalizer import TITLE_KEY, LONG_TITLE_KEY, NOTICE_TYPE_KEY, \
1516
NOTICE_NUMBER_KEY, OJS_TYPE_KEY, OJS_NUMBER_KEY, LANGUAGE_KEY, EU_INSTITUTION_KEY, SENT_DATE_KEY, DEADLINE_DATE_KEY, \
1617
BUYER_COUNTRY_KEY, BUYER_NAME_KEY, BUYER_CITY_KEY, PUBLICATION_DATE_KEY, FORM_NUMBER_KEY, \
@@ -236,3 +237,9 @@ def notice_with_rdf_manifestation():
236237
notice._rdf_manifestation = RDFManifestation(object_data=rdf_content_path.read_text(encoding="utf-8"))
237238
notice._status = NoticeStatus.TRANSFORMED
238239
return notice
240+
241+
242+
@pytest.fixture
243+
def transformed_complete_notice():
244+
test_notice_repository = NoticeRepositoryInFileSystem(repository_path=TEST_DATA_PATH / "notices")
245+
return test_notice_repository.get("396207_2018")

tests/test_data/notice_packager/template_metadata.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
}
99
},
1010
"work": {
11+
"identifier": "2016_S_001_196390",
1112
"do_not_index": "true",
1213
"date_document": "2016-08-01",
1314
"created_by_agent": "PUBL",
Lines changed: 31 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,34 @@
11
<?xml version="1.0" encoding="UTF-8"?>
2-
<!DOCTYPE rdf:RDF [
3-
<!ENTITY % cellarEntities PUBLIC "-//PO-RESOURCE//ENTITIES CELLAR cdm model 1.0//EN" "/home/metaconv/metaconv_components/components/common/data/cellar_uris.ent">
4-
%cellarEntities;
5-
]>
6-
<rdf:RDF xmlns:cdm="http://publications.europa.eu/ontology/cdm#"
2+
<!DOCTYPE rdf:RDF [
3+
<!ENTITY % cellarEntities PUBLIC
4+
"-//PO-RESOURCE//ENTITIES CELLAR cdm model 1.0//EN"
5+
"/home/metaconv/metaconv_components/components/common/data/cellar_uris.ent">
6+
%cellarEntities;
7+
]>
8+
<rdf:RDF xmlns:cdm="http://publications.europa.eu/ontology/cdm#"
79
xmlns:dct="http://purl.org/dc/terms/"
810
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
9-
<cdm:work rdf:about="&resource;dataset/196390_2016">
10-
<cdm:do_not_index rdf:datatype="http://www.w3.org/2001/XMLSchema#boolean">true</cdm:do_not_index>
11-
<cdm:work_date_document rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2021-08-01</cdm:work_date_document>
12-
<cdm:work_created_by_agent rdf:resource="&cellar-authority;corporate-body/PUBL"/>
13-
<cdm:work_dataset_published_by_agent rdf:resource="&cellar-authority;corporate-body/PUBL"/>
14-
<cdm:datetime_transmission rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2021-08-01T00:01:00</cdm:datetime_transmission>
15-
<cdm:datetime_negotiation rdf:resource="http://publications.europa.eu/ontology/cdm#datetime_transmission"/>
16-
<cdm:work_title xml:lang="en">eProcurement notice 196390_2016</cdm:work_title>
17-
<cdm:work_date_creation rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2016-01-01</cdm:work_date_creation>
18-
<cdm:work_id>http://data.europa.eu/a4g/resource/2016/196390_2016</cdm:work_id>
19-
<cdm:work_dataset_version>20160101-0</cdm:work_dataset_version>
20-
<cdm:work_dataset_has_type_concept_type_dataset rdf:resource="http://publications.europa.eu/resource/authority/dataset-type/TEST_DATA"/>
21-
<cdm:work_dataset_keyword>eProcurement</cdm:work_dataset_keyword>
22-
<cdm:work_dataset_keyword>notice</cdm:work_dataset_keyword>
23-
<cdm:work_dataset_has_frequency_publication_frequency rdf:resource="http://publications.europa.eu/resource/authority/frequency/OTHER"/>
24-
</cdm:work>
25-
<cdm:expression rdf:about="&resource;expression/196390_2016">
26-
<cdm:expression_belongs_to_work rdf:resource="&resource;dataset/196390_2016"/>
27-
<cdm:expression_title xml:lang="en">eProcurement notice 196390_2016</cdm:expression_title>
28-
<cdm:expression_uses_language rdf:resource="&cellar-authority;language/ENG"/>
29-
</cdm:expression>
30-
<cdm:manifestation_distribution rdf:about="&resource;distribution/196390_2016/196390_2016_rdf">
31-
<cdm:manifestation_manifests_expression rdf:resource="&resource;expression/196390_2016"/>
32-
<cdm:manifestation_type rdf:datatype="http://www.w3.org/2001/XMLSchema#string">E_PROCUREMENT_ONTOLOGY</cdm:manifestation_type>
33-
<cdm:manifestation_date_publication rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2021-08-01</cdm:manifestation_date_publication>
34-
<cdm:manifestation_distribution_has_status_distribution_status rdf:resource="http://publications.europa.eu/resource/authority/dataset-status/COMPLETED"/>
35-
<cdm:manifestation_distribution_has_media_type_concept_media_type rdf:resource="http://publications.europa.eu/resource/authority/file-type/RDF"/>
36-
</cdm:manifestation_distribution>
37-
</rdf:RDF>
11+
12+
<cdm:work rdf:about="&resource;ted/2016_S_001_196390">
13+
<rdf:type rdf:resource="http://publications.europa.eu/ontology/cdm#work"/>
14+
<rdf:type rdf:resource="http://publications.europa.eu/ontology/cdm#procurement_public"/>
15+
<cdm:work_has_resource-type rdf:resource="http://publications.europa.eu/resource/authority/resource-type/PROCUREMENT_NOTICE"/>
16+
<cdm:do_not_index rdf:datatype="http://www.w3.org/2001/XMLSchema#boolean">true</cdm:do_not_index>
17+
<cdm:work_date_document rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2016-08-01</cdm:work_date_document>
18+
<cdm:work_created_by_agent rdf:resource="&cellar-authority;corporate-body/PUBL"/>
19+
<cdm:work_title xml:lang="en">eProcurement notice 196390_2016</cdm:work_title>
20+
<cdm:datetime_transmission rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2021-08-01T00:01:00</cdm:datetime_transmission>
21+
</cdm:work>
22+
23+
<cdm:expression rdf:about="&resource;ted/2016_S_001_196390.MUL">
24+
<cdm:expression_belongs_to_work rdf:resource="&resource;ted/2016_S_001_196390"/>
25+
<cdm:expression_title xml:lang="en">eProcurement notice 196390_2016</cdm:expression_title>
26+
<cdm:expression_uses_language rdf:resource="&cellar-authority;language/ENG"/>
27+
</cdm:expression>
28+
29+
<cdm:manifestation_distribution rdf:about="&resource;ted/2016_S_001_196390.MUL.rdf">
30+
<cdm:manifestation_manifests_expression rdf:resource="&resource;ted/2016_S_001_196390.MUL"/>
31+
<cdm:manifestation_type rdf:datatype="http://www.w3.org/2001/XMLSchema#string">E_PROCUREMENT_ONTOLOGY</cdm:manifestation_type>
32+
<cdm:manifestation_date_publication rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2016-08-01</cdm:manifestation_date_publication>
33+
</cdm:manifestation_distribution>
34+
</rdf:RDF>
Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,40 @@
11
<?xml version="1.0" encoding="UTF-8"?>
2-
<mets xmlns="http://www.loc.gov/METS/"
3-
xmlns:xlink="http://www.w3.org/1999/xlink"
4-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5-
xsi:schemaLocation="http://www.loc.gov/METS/ cellar-mets.xsd"
6-
TYPE="create"
7-
PROFILE="http://publications.europa.eu/resource/mets/op-sip-profile_002">
2+
<mets xmlns="http://www.loc.gov/METS/"
3+
xmlns:xlink="http://www.w3.org/1999/xlink"
4+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5+
xsi:schemaLocation="http://www.loc.gov/METS/ cellar-mets.xsd"
6+
TYPE="create"
7+
PROFILE="http://publications.europa.eu/resource/mets/op-sip-profile_002">
88
<metsHdr CREATEDATE="2017-05-30T17:23:11.922+02:00">
9-
<metsDocumentID>196390_2016_mets2create</metsDocumentID>
9+
<metsDocumentID>2016_S_001_196390_create</metsDocumentID>
1010
</metsHdr>
1111
<dmdSec ID="dmdSec01">
12-
<mdRef MDTYPE="OTHER" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="INSTANCE"
13-
xlink:href="196390_2016-0.mets.xml.dmd.rdf"/>
12+
<mdRef MDTYPE="OTHER" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="INSTANCE" xlink:href="196390_2016-0.mets.xml.dmd.rdf"/>
1413
</dmdSec>
1514
<amdSec>
1615
<techMD ID="techMDID001">
17-
<mdRef MDTYPE="OTHER" LOCTYPE="URL" MIMETYPE="application/rdf+xml"
18-
OTHERMDTYPE="INSTANCE" xlink:href="techMDID001.tmd.rdf"/>
16+
<mdRef MDTYPE="OTHER" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="INSTANCE" xlink:href="techMDID001.tmd.rdf"/>
1917
</techMD>
2018
</amdSec>
2119
<fileSec>
2220
<fileGrp>
23-
<file ID="file-001" MIMETYPE="application/rdf+xml"
24-
CHECKSUM="f9cdda52af5e532068547f0c91fcf186840bd088 " CHECKSUMTYPE="SHA-1">
21+
<file ID="file-001" MIMETYPE="application/rdf+xml" CHECKSUM="f9cdda52af5e532068547f0c91fcf186840bd088 " CHECKSUMTYPE="SHA-1">
2522
<FLocat LOCTYPE="URL" xlink:href="196390_2016.rdf"/>
2623
</file>
2724
</fileGrp>
2825
</fileSec>
2926
<structMap ID="structMap01">
3027
<div TYPE="work" CONTENTIDS="dataset:196390_2016" DMDID="dmdSec01" ID="w-01">
31-
<div TYPE="expression" CONTENTIDS="expression:196390_2016"
32-
DMDID="dmdSec01" ID="e-01">
33-
<div TYPE="manifestation"
34-
CONTENTIDS="distribution:196390_2016/196390_2016_rdf"
35-
DMDID="dmdSec01" ADMID="techMDID001" ID="m-001">
36-
<fptr
37-
CONTENTIDS="distribution:196390_2016/196390_2016.rdf"
38-
FILEID="file-001"/>
28+
<div TYPE="expression" CONTENTIDS="expression:196390_2016" DMDID="dmdSec01" ID="e-01">
29+
<div TYPE="manifestation" CONTENTIDS="distribution:196390_2016/196390_2016_rdf" DMDID="dmdSec01" ADMID="techMDID001" ID="m-001">
30+
<fptr CONTENTIDS="distribution:196390_2016/196390_2016.rdf" FILEID="file-001"/>
3931
</div>
4032
</div>
4133
</div>
4234
</structMap>
4335
<behaviorSec>
44-
<behavior BTYPE="sparql-load" STRUCTID="m-001">
45-
<mechanism LOCTYPE="URL" LABEL="Sparql-load" xlink:href="cellar-mets:sparql-load-behavior?model=http%3A%2F%2Fdata.europa.eu%2Fa4g%2Fresource%2F2016%2F196390_2016"/>
46-
</behavior>
47-
</behaviorSec>
36+
<behavior BTYPE="sparql-load" STRUCTID="m-001">
37+
<mechanism LOCTYPE="URL" LABEL="Sparql-load" xlink:href="cellar-mets:sparql-load-behavior?model=http%3A%2F%2Fdata.europa.eu%2Fa4g%2Fresource%2F2016%2F196390_2016"/>
38+
</behavior>
39+
</behaviorSec>
4840
</mets>
Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,40 @@
11
<?xml version="1.0" encoding="UTF-8"?>
2-
<mets xmlns="http://www.loc.gov/METS/"
3-
xmlns:xlink="http://www.w3.org/1999/xlink"
4-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5-
xsi:schemaLocation="http://www.loc.gov/METS/ cellar-mets.xsd"
6-
TYPE="update"
7-
PROFILE="http://publications.europa.eu/resource/mets/op-sip-profile_002">
2+
<mets xmlns="http://www.loc.gov/METS/"
3+
xmlns:xlink="http://www.w3.org/1999/xlink"
4+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5+
xsi:schemaLocation="http://www.loc.gov/METS/ cellar-mets.xsd"
6+
TYPE="update"
7+
PROFILE="http://publications.europa.eu/resource/mets/op-sip-profile_002">
88
<metsHdr UPDATEDATE="2017-05-30T17:23:11.922+02:00">
9-
<metsDocumentID>196390_2016_mets2update</metsDocumentID>
9+
<metsDocumentID>2016_S_001_196390_update</metsDocumentID>
1010
</metsHdr>
1111
<dmdSec ID="dmdSec01">
12-
<mdRef MDTYPE="OTHER" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="INSTANCE"
13-
xlink:href="196390_2016-0.mets.xml.dmd.rdf"/>
12+
<mdRef MDTYPE="OTHER" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="INSTANCE" xlink:href="196390_2016-0.mets.xml.dmd.rdf"/>
1413
</dmdSec>
1514
<amdSec>
1615
<techMD ID="techMDID001">
17-
<mdRef MDTYPE="OTHER" LOCTYPE="URL" MIMETYPE="application/rdf+xml"
18-
OTHERMDTYPE="INSTANCE" xlink:href="techMDID001.tmd.rdf"/>
16+
<mdRef MDTYPE="OTHER" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="INSTANCE" xlink:href="techMDID001.tmd.rdf"/>
1917
</techMD>
2018
</amdSec>
2119
<fileSec>
2220
<fileGrp>
23-
<file ID="file-001" MIMETYPE="application/rdf+xml"
24-
CHECKSUM="f9cdda52af5e532068547f0c91fcf186840bd088 " CHECKSUMTYPE="SHA-1">
21+
<file ID="file-001" MIMETYPE="application/rdf+xml" CHECKSUM="f9cdda52af5e532068547f0c91fcf186840bd088 " CHECKSUMTYPE="SHA-1">
2522
<FLocat LOCTYPE="URL" xlink:href="196390_2016.rdf"/>
2623
</file>
2724
</fileGrp>
2825
</fileSec>
2926
<structMap ID="structMap01">
3027
<div TYPE="work" CONTENTIDS="dataset:196390_2016" DMDID="dmdSec01" ID="w-01">
31-
<div TYPE="expression" CONTENTIDS="expression:196390_2016"
32-
DMDID="dmdSec01" ID="e-01">
33-
<div TYPE="manifestation"
34-
CONTENTIDS="distribution:196390_2016/196390_2016_rdf"
35-
DMDID="dmdSec01" ADMID="techMDID001" ID="m-001">
36-
<fptr
37-
CONTENTIDS="distribution:196390_2016/196390_2016.rdf"
38-
FILEID="file-001"/>
28+
<div TYPE="expression" CONTENTIDS="expression:196390_2016" DMDID="dmdSec01" ID="e-01">
29+
<div TYPE="manifestation" CONTENTIDS="distribution:196390_2016/196390_2016_rdf" DMDID="dmdSec01" ADMID="techMDID001" ID="m-001">
30+
<fptr CONTENTIDS="distribution:196390_2016/196390_2016.rdf" FILEID="file-001"/>
3931
</div>
4032
</div>
4133
</div>
4234
</structMap>
4335
<behaviorSec>
44-
<behavior BTYPE="sparql-load" STRUCTID="m-001">
45-
<mechanism LOCTYPE="URL" LABEL="Sparql-load" xlink:href="cellar-mets:sparql-load-behavior?model=http%3A%2F%2Fdata.europa.eu%2Fa4g%2Fresource%2F2016%2F196390_2016"/>
46-
</behavior>
47-
</behaviorSec>
36+
<behavior BTYPE="sparql-load" STRUCTID="m-001">
37+
<mechanism LOCTYPE="URL" LABEL="Sparql-load" xlink:href="cellar-mets:sparql-load-behavior?model=http%3A%2F%2Fdata.europa.eu%2Fa4g%2Fresource%2F2016%2F196390_2016"/>
38+
</behavior>
39+
</behaviorSec>
4840
</mets>

0 commit comments

Comments
 (0)