Skip to content

Commit 4b345f9

Browse files
authored
Merge pull request #447 from OP-TED/feature/TED-1219-packages
Feature/ted 1219 packages
2 parents 61ceca6 + 52e2a55 commit 4b345f9

22 files changed

Lines changed: 605 additions & 3112 deletions

ted_sws/notice_packager/adapters/template_generator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from jinja2 import Environment, PackageLoader
1313

14-
from ted_sws.notice_packager.model.metadata import PackagerMetadata, validate_notice_action_type
14+
from ted_sws.notice_packager.model.metadata import PackagerMetadata, validate_mets_type
1515

1616
TEMPLATES = Environment(loader=PackageLoader("ted_sws.notice_packager.resources", "templates"))
1717

@@ -34,8 +34,8 @@ def tmd_rdf_generator(cls, data: PackagerMetadata = None) -> str:
3434

3535
@classmethod
3636
def mets2action_mets_xml_generator(cls, data: PackagerMetadata = None) -> str:
37-
action = data.notice.action.type
38-
validate_notice_action_type(action)
37+
action = data.mets.type
38+
validate_mets_type(action)
3939

4040
template = 'mets2action_mets_xml.jinja2'
4141
return cls.__generate_template(template, data)

ted_sws/notice_packager/model/metadata.py

Lines changed: 55 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,25 @@
1616

1717
from ted_sws.core.model.metadata import Metadata
1818

19-
WORK_AGENT = "PUBL"
19+
METS_PROFILE = "http://publications.europa.eu/resource/mets/op-sip-profile_002"
20+
METS_TYPE_CREATE = "create"
21+
METS_TYPE_UPDATE = "update"
22+
METS_TYPE_DELETE = "delete"
23+
METS_ACCEPTED_TYPES = [METS_TYPE_CREATE, METS_TYPE_UPDATE, METS_TYPE_DELETE]
24+
METS_DMD_MDTYPE = "OTHER"
25+
METS_DMD_OTHERMDTYPE = "INSTANCE"
26+
METS_DMD_HREF = "{work_identifier}_{revision}.mets.xml.dmd.rdf"
27+
METS_DMD_ID = "dmd_{work_identifier}_{revision}_{dmd_idx}"
28+
METS_TMD_ID = "tmd_{work_identifier}_{revision}_{tmd_idx}"
29+
METS_TMD_HREF = "{work_identifier}_{revision}.tmd.rdf"
30+
METS_TMD_MDTYPE = "OTHER"
31+
METS_TMD_OTHERMDTYPE = "INSTANCE"
32+
METS_FILE_ID = "file_{work_identifier}_{revision}_{file_idx}"
33+
METS_NOTICE_FILE_HREF = "{work_identifier}_{revision}.notice.rdf"
34+
METS_NOTICE_FILE_MIMETYPE = "application/rdf+xml"
35+
METS_NOTICE_FILE_CHECKSUM_TYPE = "SHA-256"
36+
37+
WORK_AGENT = "EURUN"
2038
PUBLICATION_FREQUENCY = "OTHER"
2139
CONCEPT_TYPE_DATASET = "TEST_DATA"
2240
DATASET_KEYWORD = [
@@ -35,38 +53,52 @@
3553
LANGUAGE = LANGUAGES[0]
3654
USES_LANGUAGE = "MUL"
3755

38-
ACTION_CREATE = "create"
39-
ACTION_UPDATE = "update"
40-
ACCEPTED_ACTIONS = [ACTION_CREATE, ACTION_UPDATE]
41-
4256
REVISION = "0"
4357

4458

45-
def validate_notice_action_type(v):
46-
if v not in ACCEPTED_ACTIONS:
47-
raise ValueError('No such action: %s' % v)
59+
def validate_mets_type(mets_type):
60+
if mets_type not in METS_ACCEPTED_TYPES:
61+
raise ValueError('No such METS type: %s' % mets_type)
4862

4963

50-
class NoticeActionMetadata(Metadata):
64+
class NoticeMetadata(Metadata):
5165
"""
52-
Notice action metadata
66+
General notice metadata
5367
"""
54-
type: str = ACTION_CREATE
55-
date: str = datetime.datetime.now().isoformat()
56-
57-
@validator('type')
58-
def validate_notice_action_type(cls, v):
59-
validate_notice_action_type(v)
60-
return v
68+
id: Optional[str]
69+
public_number_document: Optional[str]
70+
public_number_edition: Optional[str]
6171

6272

63-
class NoticeMetadata(Metadata):
73+
class MetsMetadata(Metadata):
6474
"""
6575
General notice metadata
6676
"""
67-
id: Optional[str] = None
6877
languages: List[str] = LANGUAGES
69-
action: NoticeActionMetadata = NoticeActionMetadata()
78+
revision: str = REVISION
79+
80+
type: str = METS_TYPE_CREATE
81+
profile: str = METS_PROFILE
82+
createdate: str = datetime.datetime.now().isoformat()
83+
document_id: Optional[str]
84+
dmd_id: Optional[str]
85+
dmd_mdtype: str = METS_DMD_MDTYPE
86+
dmd_othermdtype: str = METS_DMD_OTHERMDTYPE
87+
dmd_href: Optional[str]
88+
tmd_id: Optional[str]
89+
tmd_href: Optional[str]
90+
tmd_mdtype: str = METS_TMD_MDTYPE
91+
tmd_othermdtype: str = METS_TMD_OTHERMDTYPE
92+
file_id: Optional[str]
93+
notice_file_href: Optional[str]
94+
notice_file_mimetype: Optional[str] = METS_NOTICE_FILE_MIMETYPE
95+
notice_file_checksum: Optional[str]
96+
notice_file_checksum_type: Optional[str] = METS_NOTICE_FILE_CHECKSUM_TYPE
97+
98+
@validator('type')
99+
def validate_notice_action_type(cls, action_type):
100+
validate_mets_type(action_type)
101+
return action_type
70102

71103

72104
class WorkMetadata(Metadata):
@@ -95,11 +127,13 @@ class WorkMetadata(Metadata):
95127

96128

97129
class ExpressionMetadata(Metadata):
130+
identifier: Optional[str]
98131
title: Optional[Dict[str, str]] = None
99132
uses_language: str = USES_LANGUAGE
100133

101134

102135
class ManifestationMetadata(Metadata):
136+
identifier: Optional[str]
103137
type: str = MANIFESTATION_TYPE
104138
date_publication: str = datetime.datetime.now().strftime('%Y-%m-%d')
105139
distribution_has_status_distribution_status: str = DISTRIBUTION_STATUS
@@ -108,6 +142,7 @@ class ManifestationMetadata(Metadata):
108142

109143
class PackagerMetadata(Metadata):
110144
notice: NoticeMetadata = NoticeMetadata()
145+
mets: MetsMetadata = MetsMetadata()
111146
work: WorkMetadata = WorkMetadata()
112147
expression: ExpressionMetadata = ExpressionMetadata()
113148
manifestation: ManifestationMetadata = ManifestationMetadata()

ted_sws/notice_packager/resources/templates/mets2action_mets_xml.jinja2

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,38 +2,38 @@
22
<mets xmlns="http://www.loc.gov/METS/"
33
xmlns:xlink="http://www.w3.org/1999/xlink"
44
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5-
xsi:schemaLocation="http://www.loc.gov/METS/ cellar-mets.xsd"
6-
TYPE="{{ notice.action.type }}"
7-
PROFILE="http://publications.europa.eu/resource/mets/op-sip-profile_002">
8-
<metsHdr {{ notice.action.type|upper }}DATE="{{ notice.action.date }}">
9-
<metsDocumentID>{{ work.identifier }}_{{ notice.action.type }}</metsDocumentID>
5+
xsi:schemaLocation="http://www.loc.gov/METS/cellar-mets.xsd"
6+
TYPE="{{ mets.type }}"
7+
PROFILE="{{ mets.profile }}">
8+
<metsHdr CREATEDATE="{{ mets.createdate }}">
9+
<metsDocumentID>{{ mets.document_id }}</metsDocumentID>
1010
</metsHdr>
11-
<dmdSec ID="dmdSec01">
12-
<mdRef MDTYPE="OTHER" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="INSTANCE" xlink:href="{{ notice.id }}-0.mets.xml.dmd.rdf"/>
11+
<dmdSec ID="{{ mets.dmd_id }}">
12+
<mdRef MDTYPE="{{ mets.dmd_mdtype }}" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="{{ mets.dmd_othermdtype }}" xlink:href="{{ mets.dmd_href }}"/>
1313
</dmdSec>
1414
<amdSec>
15-
<techMD ID="techMDID001">
16-
<mdRef MDTYPE="OTHER" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="INSTANCE" xlink:href="techMDID001.tmd.rdf"/>
15+
<techMD ID="{{ mets.tmd_id }}">
16+
<mdRef MDTYPE="{{ mets.tmd_mdtype }}" LOCTYPE="URL" MIMETYPE="application/rdf+xml" OTHERMDTYPE="{{ mets.tmd_othermdtype }}" xlink:href="{{ mets.tmd_href }}"/>
1717
</techMD>
1818
</amdSec>
1919
<fileSec>
2020
<fileGrp>
21-
<file ID="file-001" MIMETYPE="application/rdf+xml" CHECKSUM="f9cdda52af5e532068547f0c91fcf186840bd088 " CHECKSUMTYPE="SHA-1">
22-
<FLocat LOCTYPE="URL" xlink:href="{{ notice.id }}.rdf"/>
21+
<file ID="{{ mets.file_id }}" MIMETYPE="{{ mets.notice_file_mimetype }}" CHECKSUM="{{ mets.notice_file_checksum }}" CHECKSUMTYPE="{{ mets.notice_file_checksum_type }}">
22+
<FLocat LOCTYPE="URL" xlink:href="{{ mets.notice_file_href }}"/>
2323
</file>
2424
</fileGrp>
2525
</fileSec>
26-
<structMap ID="structMap01">
27-
<div TYPE="work" CONTENTIDS="dataset:{{ notice.id }}" DMDID="dmdSec01" ID="w-01">
28-
<div TYPE="expression" CONTENTIDS="expression:{{ notice.id }}" DMDID="dmdSec01" ID="e-01">
29-
<div TYPE="manifestation" CONTENTIDS="distribution:{{ notice.id }}/{{ notice.id }}_rdf" DMDID="dmdSec01" ADMID="techMDID001" ID="m-001">
30-
<fptr CONTENTIDS="distribution:{{ notice.id }}/{{ notice.id }}.rdf" FILEID="file-001"/>
26+
<structMap ID="struct_map_{{ work.identifier }}_001">
27+
<div TYPE="work" CONTENTIDS="ted:{{ work.identifier }}" DMDID="{{ mets.dmd_id }}" ID="w_{{ work.identifier }}_001">
28+
<div TYPE="expression" CONTENTIDS="ted:{{ expression.identifier }}" DMDID="{{ mets.dmd_id }}" ID="e_{{ work.identifier }}_001">
29+
<div TYPE="manifestation" CONTENTIDS="ted:{{ manifestation.identifier }}" DMDID="{{ mets.dmd_id }}" ADMID="{{ mets.tmd_id }}" ID="m_{{ work.identifier }}_001">
30+
<fptr CONTENTIDS="distribution:{{ notice.id }}/{{ mets.notice_file_href }}" FILEID="{{ mets.file_id }}"/>
3131
</div>
3232
</div>
3333
</div>
3434
</structMap>
3535
<behaviorSec>
36-
<behavior BTYPE="sparql-load" STRUCTID="m-001">
36+
<behavior BTYPE="sparql-load" STRUCTID="m_{{ work.identifier }}_001">
3737
<mechanism LOCTYPE="URL" LABEL="Sparql-load" xlink:href="cellar-mets:sparql-load-behavior?model={{ work.uri | urlencode | replace("/", "%2F") }}"/>
3838
</behavior>
3939
</behaviorSec>

ted_sws/notice_packager/resources/templates/mets_xml_dmd_rdf.jinja2

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,17 @@
1111

1212
<cdm:work rdf:about="&resource;ted/{{ work.identifier }}">
1313
<rdf:type rdf:resource="http://publications.europa.eu/ontology/cdm#work"/>
14-
<rdf:type rdf:resource="http://publications.europa.eu/ontology/cdm#procurement_public"/>
14+
{# <rdf:type rdf:resource="http://publications.europa.eu/ontology/cdm#procurement_public"/> #}
15+
<cdm:work_id_document rdf:datatype="http://www.w3.org/2001/XMLSchema#string">ted:{{ work.identifier }}</cdm:work_id_document>
1516
<cdm:work_has_resource-type rdf:resource="http://publications.europa.eu/resource/authority/resource-type/PROCUREMENT_NOTICE"/>
1617
<cdm:do_not_index rdf:datatype="http://www.w3.org/2001/XMLSchema#boolean">{{ work.do_not_index }}</cdm:do_not_index>
1718
<cdm:work_date_document rdf:datatype="http://www.w3.org/2001/XMLSchema#date">{{ work.date_document }}</cdm:work_date_document>
1819
<cdm:work_created_by_agent rdf:resource="&cellar-authority;corporate-body/{{ work.created_by_agent }}"/>
19-
{% for lang in notice.languages %}
20+
<cdm:procurement_public_number_edition rdf:datatype="http://www.w3.org/2001/XMLSchema#positiveInteger">{{ notice.public_number_edition }}</cdm:procurement_public_number_edition>
21+
{% for lang in mets.languages %}
2022
<cdm:work_title xml:lang="{{ lang }}">{{ work.title[lang] }}</cdm:work_title>
2123
{% endfor %}
24+
<cdm:procurement_public_number_document_in_official-journal rdf:datatype="http://www.w3.org/2001/XMLSchema#string">{{ notice.public_number_document }}</cdm:procurement_public_number_document_in_official-journal>
2225
<cdm:datetime_transmission rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">{{ work.datetime_transmission }}</cdm:datetime_transmission>
2326
{# <cdm:procurement_public_issued_by_country>{{ work.procurement_public_issued_by_country }}</cdm:procurement_public_issued_by_country>
2427
{% for uri in work.procurement_public_url_etendering %}
@@ -36,16 +39,16 @@
3639
<cdm:work_dataset_has_frequency_publication_frequency rdf:resource="http://publications.europa.eu/resource/authority/frequency/{{ work.dataset_has_frequency_publication_frequency }}"/> #}
3740
</cdm:work>
3841

39-
<cdm:expression rdf:about="&resource;ted/{{ work.identifier }}.MUL">
42+
<cdm:expression rdf:about="&resource;ted/{{ expression.identifier }}">
4043
<cdm:expression_belongs_to_work rdf:resource="&resource;ted/{{ work.identifier }}"/>
41-
{% for lang in notice.languages %}
44+
{% for lang in mets.languages %}
4245
<cdm:expression_title xml:lang="{{ lang }}">{{ expression.title[lang] }}</cdm:expression_title>
4346
{% endfor %}
4447
<cdm:expression_uses_language rdf:resource="&cellar-authority;language/{{ expression.uses_language }}"/>
4548
</cdm:expression>
4649

47-
<cdm:manifestation_distribution rdf:about="&resource;ted/{{ work.identifier }}.MUL.rdf">
48-
<cdm:manifestation_manifests_expression rdf:resource="&resource;ted/{{ work.identifier }}.MUL"/>
50+
<cdm:manifestation_distribution rdf:about="&resource;ted/{{ manifestation.identifier }}">
51+
<cdm:manifestation_manifests_expression rdf:resource="&resource;ted/{{ expression.identifier }}"/>
4952
<cdm:manifestation_type rdf:datatype="http://www.w3.org/2001/XMLSchema#string">{{ manifestation.type }}</cdm:manifestation_type>
5053
<cdm:manifestation_date_publication rdf:datatype="http://www.w3.org/2001/XMLSchema#date">{{ manifestation.date_publication }}</cdm:manifestation_date_publication>
5154
{# <cdm:manifestation_distribution_has_status_distribution_status rdf:resource="http://publications.europa.eu/resource/authority/dataset-status/{{ manifestation.distribution_has_status_distribution_status }}"/>

ted_sws/notice_packager/resources/templates/tmd_rdf.jinja2

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
%cellarEntities;
66
]>
77
<rdf:RDF xmlns:tdm="http://publications.europa.eu/ontology/tdm#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
8-
<tdm:stream rdf:about="&resource;distribution/{{ notice.id }}/{{ notice.id }}.rdf">
9-
<tdm:stream_name rdf:datatype="http://www.w3.org/2001/XMLSchema#string">{{ notice.id }}.rdf</tdm:stream_name>
8+
<tdm:stream rdf:about="&resource;distribution/{{ notice.id }}/{{ mets.notice_file_href }}">
9+
<tdm:stream_name rdf:datatype="http://www.w3.org/2001/XMLSchema#string">{{ mets.notice_file_href }}</tdm:stream_name>
1010
<tdm:stream_format>
1111
<tdm:format>
1212
<tdm:format_designated_by_format_designation>
@@ -16,7 +16,7 @@
1616
</tdm:format_designated_by_format_designation>
1717
</tdm:format>
1818
</tdm:stream_format>
19-
<tdm:stream_label rdf:datatype="http://www.w3.org/2001/XMLSchema#string">{{ work.title[notice.languages[0]] }}</tdm:stream_label>
19+
<tdm:stream_label rdf:datatype="http://www.w3.org/2001/XMLSchema#string">{{ work.title[mets.languages[0]] }}</tdm:stream_label>
2020
<tdm:stream_order rdf:datatype="http://www.w3.org/2001/XMLSchema#positiveInteger">1</tdm:stream_order>
2121
</tdm:stream>
2222
</rdf:RDF>

ted_sws/notice_packager/services/metadata_transformer.py

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
import datetime
1515

1616
from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata
17-
from ted_sws.notice_packager.model.metadata import PackagerMetadata, ACTION_CREATE, LANGUAGE, REVISION, BASE_WORK, \
18-
BASE_TITLE
17+
from ted_sws.notice_packager.model.metadata import PackagerMetadata, METS_TYPE_CREATE, LANGUAGE, REVISION, BASE_WORK, \
18+
BASE_TITLE, METS_DMD_HREF, METS_DMD_ID, METS_TMD_ID, METS_TMD_HREF, METS_FILE_ID, METS_NOTICE_FILE_HREF
1919

2020
# This is used in pipeline
2121
NORMALIZED_SEPARATOR = '_'
@@ -31,9 +31,12 @@ class MetadataTransformer:
3131
def __init__(self, notice_metadata: ExtractedMetadata):
3232
self.notice_metadata = notice_metadata
3333

34-
def template_metadata(self, action: str = ACTION_CREATE) -> PackagerMetadata:
34+
def template_metadata(self, action: str = METS_TYPE_CREATE) -> PackagerMetadata:
3535
metadata = self.from_notice_metadata(self.notice_metadata)
36-
metadata.notice.action.type = action
36+
37+
# here the custom and composed metadata properties are set
38+
metadata.mets.type = action
39+
metadata.mets.document_id = f"{metadata.work.identifier}_{action}"
3740
return metadata
3841

3942
@classmethod
@@ -46,16 +49,6 @@ def normalize_value(cls, value: str) -> str:
4649
"""
4750
return value.replace(DENORMALIZED_SEPARATOR, NORMALIZED_SEPARATOR)
4851

49-
@classmethod
50-
def denormalize_value(cls, value: str) -> str:
51-
"""
52-
The pipeline's separator is replaced with initial (TED API)'s one.
53-
This is used when notice goes out to API
54-
:param value:
55-
:return:
56-
"""
57-
return value.replace(NORMALIZED_SEPARATOR, DENORMALIZED_SEPARATOR)
58-
5952
@classmethod
6053
def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMetadata:
6154
_date = datetime.datetime.now()
@@ -65,6 +58,9 @@ def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMet
6558

6659
# NOTICE
6760
metadata.notice.id = cls.normalize_value(notice_metadata.notice_publication_number)
61+
metadata.notice.public_number_document = publication_notice_number(metadata.notice.id)
62+
metadata.notice.public_number_edition = publication_notice_year(
63+
notice_metadata) + notice_metadata.ojs_issue_number.zfill(3)
6864

6965
# WORK
7066
publication_date = datetime.datetime.strptime(notice_metadata.publication_date, '%Y%m%d').strftime('%Y-%m-%d')
@@ -81,10 +77,42 @@ def from_notice_metadata(cls, notice_metadata: ExtractedMetadata) -> PackagerMet
8177
metadata.work.procurement_public_url_etendering = notice_metadata.uri_list
8278

8379
# EXPRESSION
84-
metadata.expression.title = {LANGUAGE: BASE_TITLE + " " + metadata.notice.id}
80+
metadata.expression.identifier = f"{metadata.work.identifier}.MUL"
81+
metadata.expression.title = {LANGUAGE: BASE_TITLE + " " + metadata.work.identifier}
8582

8683
# MANIFESTATION
84+
metadata.manifestation.identifier = f"{metadata.expression.identifier}.rdf"
8785
metadata.manifestation.date_publication = publication_date
86+
87+
# METS
88+
metadata.mets.dmd_href = METS_DMD_HREF.format(
89+
work_identifier=metadata.work.identifier,
90+
revision=metadata.mets.revision
91+
)
92+
metadata.mets.dmd_id = METS_DMD_ID.format(
93+
work_identifier=metadata.work.identifier,
94+
revision=metadata.mets.revision,
95+
dmd_idx="001"
96+
)
97+
metadata.mets.tmd_id = METS_TMD_ID.format(
98+
work_identifier=metadata.work.identifier,
99+
revision=metadata.mets.revision,
100+
tmd_idx="001"
101+
)
102+
metadata.mets.tmd_href = METS_TMD_HREF.format(
103+
work_identifier=metadata.work.identifier,
104+
revision=metadata.mets.revision
105+
)
106+
metadata.mets.file_id = METS_FILE_ID.format(
107+
work_identifier=metadata.work.identifier,
108+
revision=metadata.mets.revision,
109+
file_idx="001"
110+
)
111+
metadata.mets.notice_file_href = METS_NOTICE_FILE_HREF.format(
112+
work_identifier=metadata.work.identifier,
113+
revision=metadata.mets.revision
114+
)
115+
88116
return metadata
89117

90118

@@ -103,4 +131,4 @@ def publication_notice_uri(notice_id, notice_metadata):
103131
def publication_work_identifier(notice_id, notice_metadata):
104132
year = publication_notice_year(notice_metadata)
105133
number = publication_notice_number(notice_id)
106-
return f"{year}_{notice_metadata.ojs_type}_{notice_metadata.ojs_issue_number}_{number}"
134+
return f"{year}_{notice_metadata.ojs_type}_{notice_metadata.ojs_issue_number.zfill(3)}_{number}"

0 commit comments

Comments
 (0)