|
11 | 11 |
|
12 | 12 | import base64 |
13 | 13 | import binascii |
14 | | -import os.path |
| 14 | +import pathlib |
15 | 15 | from pathlib import Path |
16 | 16 | from tempfile import TemporaryDirectory |
17 | | -from typing import Union |
| 17 | +from typing import List |
18 | 18 |
|
19 | 19 | from ted_sws.core.model.manifestation import METSManifestation |
20 | 20 | from ted_sws.core.model.notice import Notice |
21 | | -from ted_sws.data_manager.adapters.notice_repository import NoticeRepositoryABC |
22 | 21 | from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata |
23 | 22 | from ted_sws.notice_metadata_processor.services.xml_manifestation_metadata_extractor import \ |
24 | 23 | XMLManifestationMetadataExtractor |
25 | | -from ted_sws.notice_packager.adapters.archiver import ArchiverFactory, ARCHIVE_ZIP_FORMAT, PATH_TYPE, \ |
26 | | - LIST_TYPE as PATH_LIST_TYPE |
| 24 | +from ted_sws.notice_packager.adapters.archiver import ZipArchiver |
27 | 25 | from ted_sws.notice_packager.adapters.template_generator import TemplateGenerator |
28 | 26 | from ted_sws.notice_packager.model.metadata import ACTION_CREATE |
29 | 27 | from ted_sws.notice_packager.services.metadata_transformer import MetadataTransformer |
|
35 | 33 | FILE_TMD_FORMAT = "techMDID001.tmd.rdf" |
36 | 34 | FILE_RDF_FORMAT = "{notice_id}.ttl" |
37 | 35 |
|
38 | | -NOTICE_METADATA_TYPE = ExtractedMetadata |
39 | | -IN_DATA_TYPE = Union[NOTICE_METADATA_TYPE, Notice, str] |
40 | | -RDF_CONTENT_TYPE = Union[str, bytes] |
41 | | - |
42 | | - |
43 | | -def create_notice_package(in_data: IN_DATA_TYPE, rdf_content: RDF_CONTENT_TYPE = None, |
44 | | - extra_files: PATH_LIST_TYPE = None, action: str = ACTION_CREATE, |
45 | | - save_to: PATH_TYPE = None, notice_repository: NoticeRepositoryABC = None) -> str: |
46 | | - """ |
47 | | - :param in_data: can be Notice object, ExtractedMetadata object or notice_id string |
48 | | - :param rdf_content: base64 encoded bytes content of rdf file |
49 | | - :param extra_files: additional files paths to be added to archive |
50 | | - :param action: |
51 | | - :param save_to: can be: |
52 | | - None - base64 encoded string of archive content, |
53 | | - "" - temporary archive path, |
54 | | - string (path to archive: containing archive name or just the folder) - archive path |
55 | | - :param notice_repository: |
56 | | - :return: base64 encoded archive or path to archive |
57 | | - """ |
58 | | - |
59 | | - tmp_dir = TemporaryDirectory() |
60 | | - tmp_dir_path = Path(tmp_dir.name) |
61 | | - |
62 | | - notice_packager = NoticePackager(in_data, action, tmp_dir_path, notice_repository) |
63 | | - notice_packager.add_template_files() |
64 | | - notice_packager.add_rdf_content(rdf_content) |
65 | | - notice_packager.add_extra_files(extra_files) |
66 | | - |
67 | | - return notice_packager.pack(save_to) |
68 | | - |
69 | 36 |
|
70 | 37 | def package_notice(notice: Notice) -> Notice: |
71 | 38 | """ |
72 | 39 | This function generate METSPackage and set Notice METSManifestation. |
73 | 40 | """ |
74 | | - mets_manifestation_content = create_notice_package(in_data=notice, |
75 | | - rdf_content=notice.distilled_rdf_manifestation.object_data.encode( |
76 | | - "utf-8")) |
| 41 | + |
| 42 | + notice_packager = NoticePackager(notice, ACTION_CREATE) |
| 43 | + notice_packager.add_template_files() |
| 44 | + notice_packager.add_rdf_content(notice.distilled_rdf_manifestation.object_data.encode("utf-8")) |
| 45 | + mets_manifestation_content = notice_packager.pack() |
77 | 46 | notice.set_mets_manifestation(mets_manifestation=METSManifestation(object_data=mets_manifestation_content)) |
78 | 47 | return notice |
79 | 48 |
|
80 | 49 |
|
81 | | -def package_notice_and_save_to(notice: Notice, save_to: PATH_TYPE = None) -> str: |
82 | | - """ |
83 | | - This function package a Notice to save_to location. |
84 | | - """ |
85 | | - return create_notice_package(in_data=notice, |
86 | | - rdf_content=notice.distilled_rdf_manifestation.object_data.encode( |
87 | | - "utf-8"), |
88 | | - save_to=save_to) |
89 | | - |
90 | | - |
91 | 50 | class NoticePackager: |
92 | 51 | """ |
93 | 52 | This class will manage the steps/methods of notice packager creation |
94 | 53 | """ |
95 | 54 |
|
96 | | - def __init__(self, in_data: IN_DATA_TYPE, action: str, tmp_dir_path: Path, notice_repository: NoticeRepositoryABC): |
97 | | - self.notice_metadata: NOTICE_METADATA_TYPE = self.__validated_in_data(in_data, notice_repository) |
98 | | - self.archiver = ArchiverFactory.get_archiver(ARCHIVE_ZIP_FORMAT) |
99 | | - metadata_transformer = MetadataTransformer(self.notice_metadata) |
| 55 | + def __init__(self, notice: Notice, action: str): |
| 56 | + self.tmp_dir = TemporaryDirectory() |
| 57 | + self.tmp_dir_path = Path(self.tmp_dir.name) |
| 58 | + notice_metadata: ExtractedMetadata = XMLManifestationMetadataExtractor( |
| 59 | + xml_manifestation=notice.xml_manifestation).to_metadata() |
| 60 | + metadata_transformer = MetadataTransformer(notice_metadata) |
100 | 61 | self.template_metadata = metadata_transformer.template_metadata(action=action) |
101 | | - |
102 | 62 | self.notice_id = self.template_metadata.notice.id |
103 | 63 | self.notice_action = self.template_metadata.notice.action.type |
104 | | - |
105 | | - self.tmp_dir_path = tmp_dir_path |
106 | | - self.files: PATH_LIST_TYPE = [] |
107 | | - |
108 | | - def __write_template_to_file(self, file_path, template_generator, template_metadata): |
109 | | - self.__write_to_file(file_path, template_generator(template_metadata)) |
110 | | - |
111 | | - @classmethod |
112 | | - def __write_to_file(cls, file_path, data, mode: str = 'x'): |
113 | | - with open(file_path, mode) as file: |
114 | | - file.write(data) |
115 | | - file.close() |
116 | | - |
117 | | - @classmethod |
118 | | - def __validated_in_data(cls, in_data: IN_DATA_TYPE, notice_repository: NoticeRepositoryABC) -> NOTICE_METADATA_TYPE: |
119 | | - accepted_types = IN_DATA_TYPE.__args__ |
120 | | - if not isinstance(in_data, accepted_types): |
121 | | - raise TypeError('Notice Packager accepts input data of "%s" types only' % accepted_types) |
122 | | - |
123 | | - # here, needed notice_metadata is extracted from provided in_data |
124 | | - notice_metadata = None |
125 | | - if isinstance(in_data, str): # notice_id |
126 | | - ''' |
127 | | - if we get notice_id as in_data, |
128 | | - a Notice must be assigned to in_data for next step of the validation flow |
129 | | - ''' |
130 | | - # get Notice from DB |
131 | | - notice_id = in_data |
132 | | - if isinstance(notice_repository, NoticeRepositoryABC): |
133 | | - in_data = notice_repository.get(reference=notice_id) |
134 | | - else: |
135 | | - raise TypeError('Notice Repository must be sent, if providing notice_id "%s"' % notice_id) |
136 | | - |
137 | | - if isinstance(in_data, Notice): # Notice |
138 | | - ''' |
139 | | - if we get Notice object as in_data, |
140 | | - notice_metadata should be extracted from it |
141 | | - ''' |
142 | | - notice = in_data |
143 | | - notice_metadata = XMLManifestationMetadataExtractor( |
144 | | - xml_manifestation=notice.xml_manifestation).to_metadata() |
145 | | - elif isinstance(in_data, NOTICE_METADATA_TYPE): # ExtractedMetadata |
146 | | - notice_metadata = in_data |
147 | | - |
148 | | - if not isinstance(notice_metadata, NOTICE_METADATA_TYPE): |
149 | | - raise TypeError('Notice Metadata must be of "%s" type' % NOTICE_METADATA_TYPE.__name__) |
150 | | - |
151 | | - return notice_metadata |
| 64 | + self.files: List[pathlib.Path] = [] |
152 | 65 |
|
153 | 66 | def add_template_files(self): |
154 | 67 | file_mets_xml_dmd_rdf = self.tmp_dir_path / FILE_METS_XML_FORMAT.format(notice_id=self.notice_id) |
155 | | - self.__write_template_to_file(file_mets_xml_dmd_rdf, TemplateGenerator.mets_xml_dmd_rdf_generator, |
156 | | - self.template_metadata) |
157 | | - |
158 | 68 | file_tmd_rdf = self.tmp_dir_path / FILE_TMD_FORMAT.format() |
159 | | - self.__write_template_to_file(file_tmd_rdf, TemplateGenerator.tmd_rdf_generator, self.template_metadata) |
160 | | - |
161 | 69 | file_mets2action_mets_xml = self.tmp_dir_path / FILE_METS_ACTION_FORMAT.format( |
162 | 70 | work_identifier=self.template_metadata.work.identifier, |
163 | 71 | action=self.notice_action |
164 | 72 | ) |
165 | | - self.__write_template_to_file(file_mets2action_mets_xml, TemplateGenerator.mets2action_mets_xml_generator, |
166 | | - self.template_metadata) |
167 | | - |
| 73 | + encoding_type = "utf-8" |
| 74 | + file_mets_xml_dmd_rdf.write_text(TemplateGenerator.mets_xml_dmd_rdf_generator(self.template_metadata), |
| 75 | + encoding=encoding_type |
| 76 | + ) |
| 77 | + file_tmd_rdf.write_text(TemplateGenerator.tmd_rdf_generator(self.template_metadata), encoding=encoding_type) |
| 78 | + file_mets2action_mets_xml.write_text(TemplateGenerator.mets2action_mets_xml_generator(self.template_metadata), |
| 79 | + encoding=encoding_type) |
168 | 80 | self.files = [ |
169 | 81 | file_mets_xml_dmd_rdf, |
170 | 82 | file_tmd_rdf, |
171 | 83 | file_mets2action_mets_xml |
172 | 84 | ] |
173 | 85 |
|
174 | | - def add_rdf_content(self, rdf_content: RDF_CONTENT_TYPE): |
| 86 | + def add_rdf_content(self, rdf_content: bytes): |
| 87 | + """ |
| 88 | +
|
| 89 | + :param rdf_content: |
| 90 | + :return: |
| 91 | + """ |
175 | 92 | if rdf_content is not None: |
176 | | - if isinstance(rdf_content, str): |
177 | | - rdf_content = bytes(rdf_content, 'utf-8') |
178 | 93 | try: |
179 | 94 | rdf_content_bytes = base64.b64decode(rdf_content, validate=True) |
180 | 95 | except binascii.Error: |
181 | 96 | rdf_content_bytes = rdf_content |
182 | 97 | rdf_file_path = self.tmp_dir_path / FILE_RDF_FORMAT.format(notice_id=self.notice_id) |
183 | | - self.__write_to_file(rdf_file_path, rdf_content_bytes, 'xb') |
| 98 | + rdf_file_path.write_bytes(rdf_content_bytes) |
184 | 99 | self.files.append(rdf_file_path) |
185 | 100 |
|
186 | | - def add_extra_files(self, extra_files: PATH_LIST_TYPE): |
| 101 | + def add_extra_files(self, extra_files: List[pathlib.Path]): |
| 102 | + """ |
| 103 | +
|
| 104 | + :param extra_files: |
| 105 | + :return: |
| 106 | + """ |
187 | 107 | if extra_files is not None: |
188 | 108 | self.files += extra_files |
189 | 109 |
|
190 | | - def pack(self, save_to: PATH_TYPE) -> str: |
| 110 | + def get_archive_name(self) -> str: |
191 | 111 | archive_name = ARCHIVE_NAME_FORMAT.format( |
192 | 112 | work_identifier=self.template_metadata.work.identifier, |
193 | 113 | action=self.template_metadata.notice.action.type |
194 | 114 | ) |
195 | | - |
196 | | - archive_path = self.tmp_dir_path / archive_name |
197 | | - package_path = self.archiver.process_archive(archive_path, self.files) |
198 | | - |
199 | | - with open(package_path, "rb") as f: |
200 | | - raw_archive_content = f.read() |
201 | | - |
202 | | - if save_to is None: |
203 | | - # If no save_to path is specified, then just return the package content as string |
204 | | - archive_content = base64.b64encode(raw_archive_content) |
205 | | - return str(archive_content, 'utf-8') |
206 | | - else: |
207 | | - if save_to: |
208 | | - # If save_to path is not empty, then create, write the package and return the destination path |
209 | | - save_to_path = Path(save_to) |
210 | | - if os.path.isdir(save_to_path): |
211 | | - save_to_path /= archive_name |
212 | | - self.__write_to_file(save_to_path, raw_archive_content, 'wb') |
213 | | - return str(save_to_path) |
214 | | - else: |
215 | | - # If save_to is empty (save_to="") then return the created package source path |
216 | | - return package_path |
| 115 | + return archive_name |
| 116 | + |
| 117 | + def pack(self) -> str: |
| 118 | + archiver = ZipArchiver() |
| 119 | + archive_path = self.tmp_dir_path / self.get_archive_name() |
| 120 | + package_path = archiver.process_archive(archive_path, self.files) |
| 121 | + raw_archive_content = package_path.read_bytes() |
| 122 | + archive_content = base64.b64encode(raw_archive_content) |
| 123 | + return str(archive_content, 'utf-8') |
0 commit comments