Skip to content

Commit 2a4a48c

Browse files
Merge pull request #444 from OP-TED/feature/TED-1219
refactor notice packager
2 parents f2d759e + d78a813 commit 2a4a48c

5 files changed

Lines changed: 57 additions & 298 deletions

File tree

ted_sws/notice_packager/adapters/archiver.py

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import abc
1313
import os
1414
from pathlib import Path
15-
from typing import List, Union
15+
from typing import List
1616
from zipfile import ZipFile, ZIP_DEFLATED
1717

1818
ARCHIVE_ZIP_FORMAT = "zip"
@@ -22,35 +22,21 @@
2222
ARCHIVE_MODE_APPEND = 'a'
2323
ARCHIVE_MODE = ARCHIVE_MODE_WRITE
2424

25-
PATH_TYPE = Union[Path, str]
26-
LIST_TYPE = List[PATH_TYPE]
27-
2825

2926
class ArchiverABC(abc.ABC):
3027
"""
3128
This abstract class provides methods definitions and infos for available archivers
3229
"""
3330

3431
@abc.abstractmethod
35-
def process_archive(self, archive_name: PATH_TYPE, files: LIST_TYPE, mode: str):
32+
def process_archive(self, archive_name: Path, files: List[Path], mode: str) -> Path:
3633
"""
3734
This method adds the files (based on provided archive mode) to archive
3835
"""
3936

4037

41-
class ArchiverFactory:
42-
@classmethod
43-
def get_archiver(cls, archive_format=ARCHIVE_DEFAULT_FORMAT):
44-
"""Factory Method to return the needed Archiver, based on archive format"""
45-
archivers = {
46-
"zip": ZipArchiver
47-
}
48-
49-
return archivers[archive_format]()
50-
51-
5238
class ZipArchiver(ArchiverABC):
53-
def process_archive(self, archive_name: PATH_TYPE, files: LIST_TYPE, mode: str = ARCHIVE_MODE) -> str:
39+
def process_archive(self, archive_name: Path, files: List[Path], mode: str = ARCHIVE_MODE) -> Path:
5440
with ZipFile(archive_name, mode=mode, compression=ARCHIVE_ZIP_COMPRESSION) as archive:
5541
for file in files:
5642
if os.path.isfile(file):

ted_sws/notice_packager/adapters/template_generator.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,22 @@
1818

1919
class TemplateGenerator:
2020
@classmethod
21-
def __generate_template(cls, template, data: PackagerMetadata = None):
21+
def __generate_template(cls, template, data: PackagerMetadata = None) -> str:
2222
template_render = TEMPLATES.get_template(template).render(data.dict())
2323
return template_render
2424

2525
@classmethod
26-
def mets_xml_dmd_rdf_generator(cls, data: PackagerMetadata = None):
26+
def mets_xml_dmd_rdf_generator(cls, data: PackagerMetadata = None) -> str:
2727
template = 'mets_xml_dmd_rdf.jinja2'
2828
return cls.__generate_template(template, data)
2929

3030
@classmethod
31-
def tmd_rdf_generator(cls, data: PackagerMetadata = None):
31+
def tmd_rdf_generator(cls, data: PackagerMetadata = None) -> str:
3232
template = 'tmd_rdf.jinja2'
3333
return cls.__generate_template(template, data)
3434

3535
@classmethod
36-
def mets2action_mets_xml_generator(cls, data: PackagerMetadata = None):
36+
def mets2action_mets_xml_generator(cls, data: PackagerMetadata = None) -> str:
3737
action = data.notice.action.type
3838
validate_notice_action_type(action)
3939

ted_sws/notice_packager/services/notice_packager.py

Lines changed: 45 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,17 @@
1111

1212
import base64
1313
import binascii
14-
import os.path
14+
import pathlib
1515
from pathlib import Path
1616
from tempfile import TemporaryDirectory
17-
from typing import Union
17+
from typing import List
1818

1919
from ted_sws.core.model.manifestation import METSManifestation
2020
from ted_sws.core.model.notice import Notice
21-
from ted_sws.data_manager.adapters.notice_repository import NoticeRepositoryABC
2221
from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata
2322
from ted_sws.notice_metadata_processor.services.xml_manifestation_metadata_extractor import \
2423
XMLManifestationMetadataExtractor
25-
from ted_sws.notice_packager.adapters.archiver import ArchiverFactory, ARCHIVE_ZIP_FORMAT, PATH_TYPE, \
26-
LIST_TYPE as PATH_LIST_TYPE
24+
from ted_sws.notice_packager.adapters.archiver import ZipArchiver
2725
from ted_sws.notice_packager.adapters.template_generator import TemplateGenerator
2826
from ted_sws.notice_packager.model.metadata import ACTION_CREATE
2927
from ted_sws.notice_packager.services.metadata_transformer import MetadataTransformer
@@ -35,182 +33,91 @@
3533
FILE_TMD_FORMAT = "techMDID001.tmd.rdf"
3634
FILE_RDF_FORMAT = "{notice_id}.ttl"
3735

38-
NOTICE_METADATA_TYPE = ExtractedMetadata
39-
IN_DATA_TYPE = Union[NOTICE_METADATA_TYPE, Notice, str]
40-
RDF_CONTENT_TYPE = Union[str, bytes]
41-
42-
43-
def create_notice_package(in_data: IN_DATA_TYPE, rdf_content: RDF_CONTENT_TYPE = None,
44-
extra_files: PATH_LIST_TYPE = None, action: str = ACTION_CREATE,
45-
save_to: PATH_TYPE = None, notice_repository: NoticeRepositoryABC = None) -> str:
46-
"""
47-
:param in_data: can be Notice object, ExtractedMetadata object or notice_id string
48-
:param rdf_content: base64 encoded bytes content of rdf file
49-
:param extra_files: additional files paths to be added to archive
50-
:param action:
51-
:param save_to: can be:
52-
None - base64 encoded string of archive content,
53-
"" - temporary archive path,
54-
string (path to archive: containing archive name or just the folder) - archive path
55-
:param notice_repository:
56-
:return: base64 encoded archive or path to archive
57-
"""
58-
59-
tmp_dir = TemporaryDirectory()
60-
tmp_dir_path = Path(tmp_dir.name)
61-
62-
notice_packager = NoticePackager(in_data, action, tmp_dir_path, notice_repository)
63-
notice_packager.add_template_files()
64-
notice_packager.add_rdf_content(rdf_content)
65-
notice_packager.add_extra_files(extra_files)
66-
67-
return notice_packager.pack(save_to)
68-
6936

7037
def package_notice(notice: Notice) -> Notice:
7138
"""
7239
This function generate METSPackage and set Notice METSManifestation.
7340
"""
74-
mets_manifestation_content = create_notice_package(in_data=notice,
75-
rdf_content=notice.distilled_rdf_manifestation.object_data.encode(
76-
"utf-8"))
41+
42+
notice_packager = NoticePackager(notice, ACTION_CREATE)
43+
notice_packager.add_template_files()
44+
notice_packager.add_rdf_content(notice.distilled_rdf_manifestation.object_data.encode("utf-8"))
45+
mets_manifestation_content = notice_packager.pack()
7746
notice.set_mets_manifestation(mets_manifestation=METSManifestation(object_data=mets_manifestation_content))
7847
return notice
7948

8049

81-
def package_notice_and_save_to(notice: Notice, save_to: PATH_TYPE = None) -> str:
82-
"""
83-
This function package a Notice to save_to location.
84-
"""
85-
return create_notice_package(in_data=notice,
86-
rdf_content=notice.distilled_rdf_manifestation.object_data.encode(
87-
"utf-8"),
88-
save_to=save_to)
89-
90-
9150
class NoticePackager:
9251
"""
9352
This class will manage the steps/methods of notice packager creation
9453
"""
9554

96-
def __init__(self, in_data: IN_DATA_TYPE, action: str, tmp_dir_path: Path, notice_repository: NoticeRepositoryABC):
97-
self.notice_metadata: NOTICE_METADATA_TYPE = self.__validated_in_data(in_data, notice_repository)
98-
self.archiver = ArchiverFactory.get_archiver(ARCHIVE_ZIP_FORMAT)
99-
metadata_transformer = MetadataTransformer(self.notice_metadata)
55+
def __init__(self, notice: Notice, action: str):
56+
self.tmp_dir = TemporaryDirectory()
57+
self.tmp_dir_path = Path(self.tmp_dir.name)
58+
notice_metadata: ExtractedMetadata = XMLManifestationMetadataExtractor(
59+
xml_manifestation=notice.xml_manifestation).to_metadata()
60+
metadata_transformer = MetadataTransformer(notice_metadata)
10061
self.template_metadata = metadata_transformer.template_metadata(action=action)
101-
10262
self.notice_id = self.template_metadata.notice.id
10363
self.notice_action = self.template_metadata.notice.action.type
104-
105-
self.tmp_dir_path = tmp_dir_path
106-
self.files: PATH_LIST_TYPE = []
107-
108-
def __write_template_to_file(self, file_path, template_generator, template_metadata):
109-
self.__write_to_file(file_path, template_generator(template_metadata))
110-
111-
@classmethod
112-
def __write_to_file(cls, file_path, data, mode: str = 'x'):
113-
with open(file_path, mode) as file:
114-
file.write(data)
115-
file.close()
116-
117-
@classmethod
118-
def __validated_in_data(cls, in_data: IN_DATA_TYPE, notice_repository: NoticeRepositoryABC) -> NOTICE_METADATA_TYPE:
119-
accepted_types = IN_DATA_TYPE.__args__
120-
if not isinstance(in_data, accepted_types):
121-
raise TypeError('Notice Packager accepts input data of "%s" types only' % accepted_types)
122-
123-
# here, needed notice_metadata is extracted from provided in_data
124-
notice_metadata = None
125-
if isinstance(in_data, str): # notice_id
126-
'''
127-
if we get notice_id as in_data,
128-
a Notice must be assigned to in_data for next step of the validation flow
129-
'''
130-
# get Notice from DB
131-
notice_id = in_data
132-
if isinstance(notice_repository, NoticeRepositoryABC):
133-
in_data = notice_repository.get(reference=notice_id)
134-
else:
135-
raise TypeError('Notice Repository must be sent, if providing notice_id "%s"' % notice_id)
136-
137-
if isinstance(in_data, Notice): # Notice
138-
'''
139-
if we get Notice object as in_data,
140-
notice_metadata should be extracted from it
141-
'''
142-
notice = in_data
143-
notice_metadata = XMLManifestationMetadataExtractor(
144-
xml_manifestation=notice.xml_manifestation).to_metadata()
145-
elif isinstance(in_data, NOTICE_METADATA_TYPE): # ExtractedMetadata
146-
notice_metadata = in_data
147-
148-
if not isinstance(notice_metadata, NOTICE_METADATA_TYPE):
149-
raise TypeError('Notice Metadata must be of "%s" type' % NOTICE_METADATA_TYPE.__name__)
150-
151-
return notice_metadata
64+
self.files: List[pathlib.Path] = []
15265

15366
def add_template_files(self):
15467
file_mets_xml_dmd_rdf = self.tmp_dir_path / FILE_METS_XML_FORMAT.format(notice_id=self.notice_id)
155-
self.__write_template_to_file(file_mets_xml_dmd_rdf, TemplateGenerator.mets_xml_dmd_rdf_generator,
156-
self.template_metadata)
157-
15868
file_tmd_rdf = self.tmp_dir_path / FILE_TMD_FORMAT.format()
159-
self.__write_template_to_file(file_tmd_rdf, TemplateGenerator.tmd_rdf_generator, self.template_metadata)
160-
16169
file_mets2action_mets_xml = self.tmp_dir_path / FILE_METS_ACTION_FORMAT.format(
16270
work_identifier=self.template_metadata.work.identifier,
16371
action=self.notice_action
16472
)
165-
self.__write_template_to_file(file_mets2action_mets_xml, TemplateGenerator.mets2action_mets_xml_generator,
166-
self.template_metadata)
167-
73+
encoding_type = "utf-8"
74+
file_mets_xml_dmd_rdf.write_text(TemplateGenerator.mets_xml_dmd_rdf_generator(self.template_metadata),
75+
encoding=encoding_type
76+
)
77+
file_tmd_rdf.write_text(TemplateGenerator.tmd_rdf_generator(self.template_metadata), encoding=encoding_type)
78+
file_mets2action_mets_xml.write_text(TemplateGenerator.mets2action_mets_xml_generator(self.template_metadata),
79+
encoding=encoding_type)
16880
self.files = [
16981
file_mets_xml_dmd_rdf,
17082
file_tmd_rdf,
17183
file_mets2action_mets_xml
17284
]
17385

174-
def add_rdf_content(self, rdf_content: RDF_CONTENT_TYPE):
86+
def add_rdf_content(self, rdf_content: bytes):
87+
"""
88+
89+
:param rdf_content:
90+
:return:
91+
"""
17592
if rdf_content is not None:
176-
if isinstance(rdf_content, str):
177-
rdf_content = bytes(rdf_content, 'utf-8')
17893
try:
17994
rdf_content_bytes = base64.b64decode(rdf_content, validate=True)
18095
except binascii.Error:
18196
rdf_content_bytes = rdf_content
18297
rdf_file_path = self.tmp_dir_path / FILE_RDF_FORMAT.format(notice_id=self.notice_id)
183-
self.__write_to_file(rdf_file_path, rdf_content_bytes, 'xb')
98+
rdf_file_path.write_bytes(rdf_content_bytes)
18499
self.files.append(rdf_file_path)
185100

186-
def add_extra_files(self, extra_files: PATH_LIST_TYPE):
101+
def add_extra_files(self, extra_files: List[pathlib.Path]):
102+
"""
103+
104+
:param extra_files:
105+
:return:
106+
"""
187107
if extra_files is not None:
188108
self.files += extra_files
189109

190-
def pack(self, save_to: PATH_TYPE) -> str:
110+
def get_archive_name(self) -> str:
191111
archive_name = ARCHIVE_NAME_FORMAT.format(
192112
work_identifier=self.template_metadata.work.identifier,
193113
action=self.template_metadata.notice.action.type
194114
)
195-
196-
archive_path = self.tmp_dir_path / archive_name
197-
package_path = self.archiver.process_archive(archive_path, self.files)
198-
199-
with open(package_path, "rb") as f:
200-
raw_archive_content = f.read()
201-
202-
if save_to is None:
203-
# If no save_to path is specified, then just return the package content as string
204-
archive_content = base64.b64encode(raw_archive_content)
205-
return str(archive_content, 'utf-8')
206-
else:
207-
if save_to:
208-
# If save_to path is not empty, then create, write the package and return the destination path
209-
save_to_path = Path(save_to)
210-
if os.path.isdir(save_to_path):
211-
save_to_path /= archive_name
212-
self.__write_to_file(save_to_path, raw_archive_content, 'wb')
213-
return str(save_to_path)
214-
else:
215-
# If save_to is empty (save_to="") then return the created package source path
216-
return package_path
115+
return archive_name
116+
117+
def pack(self) -> str:
118+
archiver = ZipArchiver()
119+
archive_path = self.tmp_dir_path / self.get_archive_name()
120+
package_path = archiver.process_archive(archive_path, self.files)
121+
raw_archive_content = package_path.read_bytes()
122+
archive_content = base64.b64encode(raw_archive_content)
123+
return str(archive_content, 'utf-8')

tests/features/notice_packager/test_notice_packager.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,8 @@
77
when,
88
)
99

10-
from ted_sws.core.model.manifestation import METSManifestation
1110
from ted_sws.core.model.notice import Notice, NoticeStatus
12-
from ted_sws.notice_packager.services.notice_packager import create_notice_package
11+
from ted_sws.notice_packager.services.notice_packager import package_notice
1312

1413

1514
@scenario('test_notice_packager.feature', 'Package a TED notice in a METS package')
@@ -33,11 +32,8 @@ def the_notice_status_is_eligible_for_packaging(package_eligible_notice):
3332
@when('the notice packaging is executed', target_fixture="packaged_notice")
3433
def the_notice_packaging_is_executed(package_eligible_notice):
3534
"""the notice packaging is executed."""
36-
rdf_content = package_eligible_notice.distilled_rdf_manifestation.object_data.encode("utf-8")
37-
mets_manifestation_content = create_notice_package(in_data=package_eligible_notice, rdf_content=rdf_content)
38-
package_eligible_notice.set_mets_manifestation(
39-
mets_manifestation=METSManifestation(object_data=mets_manifestation_content))
40-
return package_eligible_notice
35+
packaged_notice = package_notice(notice=package_eligible_notice)
36+
return packaged_notice
4137

4238

4339
@then('the notice have METS manifestation')

0 commit comments

Comments
 (0)