1111import base64
1212import os
1313from pathlib import Path
14+ from typing import List
1415
1516import click
17+ from pymongo import MongoClient
1618
19+ from ted_sws import config
1720from ted_sws .core .adapters .cmd_runner import CmdRunner as BaseCmdRunner
1821from ted_sws .core .model .manifestation import XMLManifestation
1922from ted_sws .core .model .notice import Notice
23+ from ted_sws .data_manager .adapters .notice_repository import NoticeRepository
24+ from ted_sws .event_manager .adapters .log import LOG_WARN_TEXT
2025from ted_sws .notice_metadata_processor .services .xml_manifestation_metadata_extractor import \
2126 XMLManifestationMetadataExtractor
22- from ted_sws .notice_packager .services .metadata_transformer import MetadataTransformer
23- from ted_sws .notice_packager .services .notice_packager import create_notice_package
2427from ted_sws .notice_packager import DEFAULT_NOTICE_PACKAGE_EXTENSION
28+ from ted_sws .notice_packager .services .metadata_transformer import MetadataTransformer
29+ from ted_sws .notice_packager .services .notice_packager import create_notice_package , package_notice_and_save_to
2530
2631CMD_NAME = "CMD_BULK_PACKAGER"
2732DEFAULT_FILES_COUNT : int = 3000
@@ -35,47 +40,60 @@ class PackageNotice(Notice):
3540
3641
3742class CmdRunner (BaseCmdRunner ):
38- def __init__ (self , rdf_files_folder , output_folder , pkgs_count : int ):
43+ def __init__ (self , rdf_files_folder , output_folder , pkgs_count : int , notice_ids : List = None ,
44+ mongodb_client = MongoClient (config .MONGO_DB_AUTH_URL )):
3945 super ().__init__ (name = CMD_NAME )
40- self .rdf_files_path = Path (os .path .realpath (rdf_files_folder ))
4146 self .output_path = Path (os .path .realpath (output_folder ))
42- self .pkgs_count = pkgs_count
43- if not self .rdf_files_path .is_dir ():
44- error_msg = f"No such folder :: [{ rdf_files_folder } ]"
45- self .log_failed_msg (error_msg )
46- raise FileNotFoundError (error_msg )
47+ self .notices = None
48+ if notice_ids :
49+ self .log (LOG_WARN_TEXT .format ("Notices: " ) + str (notice_ids ))
50+ self .notice_repository = NoticeRepository (mongodb_client = mongodb_client )
51+ self .notices = []
52+ for notice_id in notice_ids :
53+ self .notices .append (self .notice_repository .get (reference = notice_id ))
54+ else :
55+ self .rdf_files_path = Path (os .path .realpath (rdf_files_folder ))
56+ self .pkgs_count = pkgs_count
57+ if not self .rdf_files_path .is_dir ():
58+ error_msg = f"No such folder :: [{ rdf_files_folder } ]"
59+ self .log_failed_msg (error_msg )
60+ raise FileNotFoundError (error_msg )
61+
62+ self .output_path .mkdir (parents = True , exist_ok = True )
4763
4864 def run_cmd (self ):
4965 error = None
5066 try :
51- self .output_path .mkdir (parents = True , exist_ok = True )
52- rdf_files = [Path (str (f_path )) for f in os .listdir (self .rdf_files_path ) if
53- os .path .isfile (f_path := os .path .join (self .rdf_files_path , f ))]
54- rdf_files_count = len (rdf_files )
55- base_idx = 100000
56- year = 2021
57-
58- for i in range (self .pkgs_count ):
59- rdf_idx = i % rdf_files_count
60- rdf_file_path = rdf_files [rdf_idx ]
61- notice_id = str (base_idx + i ) + "_" + str (year )
62- pkg_name = notice_id
63- self .generate_package (notice_id , self .output_path , rdf_file_path , pkg_name )
67+ if self .notices :
68+ self .log ("Saving packages to " + str (self .output_path ))
69+ for notice in self .notices :
70+ package_notice_and_save_to (notice = notice ,
71+ save_to = self .output_path )
72+ else :
73+ rdf_files = [Path (str (f_path )) for f in os .listdir (self .rdf_files_path ) if
74+ os .path .isfile (f_path := os .path .join (self .rdf_files_path , f ))]
75+ rdf_files_count = len (rdf_files )
76+ base_idx = 100000
77+ year = 2021
78+
79+ for i in range (self .pkgs_count ):
80+ rdf_idx = i % rdf_files_count
81+ rdf_file_path = rdf_files [rdf_idx ]
82+ notice_id = str (base_idx + i ) + "_" + str (year )
83+ self .generate_package (notice_id , self .output_path , rdf_file_path )
6484 except Exception as e :
6585 error = e
6686
6787 return self .run_cmd_result (error )
6888
6989 @classmethod
70- def generate_package (cls , notice_id , output_path , rdf_file_path , pkg_name ):
90+ def generate_package (cls , notice_id , output_path , rdf_file_path ):
7191
7292 with open (rdf_file_path , "r" ) as f :
7393 rdf_content = f .read ()
7494
7595 encoded_rdf_content = base64 .b64encode (bytes (rdf_content , 'utf-8' ))
7696
77- output_file = output_path / (pkg_name + DEFAULT_NOTICE_PACKAGE_EXTENSION )
78-
7997 notice = PackageNotice (ted_id = notice_id )
8098 notice_metadata = XMLManifestationMetadataExtractor (
8199 xml_manifestation = notice .xml_manifestation ).to_metadata ()
@@ -84,24 +102,26 @@ def generate_package(cls, notice_id, output_path, rdf_file_path, pkg_name):
84102 create_notice_package (
85103 notice_metadata ,
86104 rdf_content = encoded_rdf_content ,
87- save_to = output_file
105+ save_to = output_path
88106 )
89107
90108
91- def run (rdf_files_count , output_folder , pkgs_count ):
92- cmd = CmdRunner (rdf_files_count , output_folder , pkgs_count )
109+ def run (rdf_files_count = None , output_folder = None , pkgs_count = None , notice_id = None ,
110+ mongodb_client = MongoClient (config .MONGO_DB_AUTH_URL )):
111+ cmd = CmdRunner (rdf_files_count , output_folder , pkgs_count , list (notice_id or []), mongodb_client )
93112 cmd .run ()
94113
95114
96115@click .command ()
97- @click .argument ('rdf-files-folder' , nargs = 1 )
98- @click .argument ('output-folder' , nargs = 1 )
116+ @click .argument ('rdf-files-folder' , nargs = 1 , required = False )
99117@click .argument ('pkgs-count' , nargs = 1 , type = click .INT , required = False , default = DEFAULT_FILES_COUNT )
100- def main (rdf_files_folder , output_folder , pkgs_count ):
118+ @click .option ('--output-folder' , required = False , default = "." )
119+ @click .option ('--notice-id' , required = False , multiple = True , default = None )
120+ def main (rdf_files_folder , pkgs_count , output_folder , notice_id ):
101121 """
102- Generates <PKGS_COUNT> test METS packages
122+ Generates test METS packages
103123 """
104- run (rdf_files_folder , output_folder , pkgs_count )
124+ run (rdf_files_folder , output_folder , pkgs_count , notice_id )
105125
106126
107127if __name__ == '__main__' :
0 commit comments