4343HEADER1_COUNT = ["TOOL IDENTIFIER" , "COUNT" ]
4444HEADER2_MEDIA = ["TOOL IDENTIFIER" , "MEDIA TYPE" , "COUNT" ]
4545HEADER3_RECORD = ["TOOL IDENTIFIER" , "RECORD TYPE" , "COUNT" ]
46+ PER_PAGE = 100
4647QUARTER = os .path .basename (PATHS ["data_quarter" ])
4748RECORD_TYPES = [
4849 "article" ,
@@ -161,27 +162,23 @@ def fetch_museums_victoria_data(args, session):
161162 record_counts = defaultdict (lambda : defaultdict (int ))
162163 media_counts = defaultdict (lambda : defaultdict (int ))
163164 licences_count = defaultdict (int )
164- records_processed = 0
165165
166166 # Iterate through each record type
167167 for record_type in RECORD_TYPES :
168+ records_processed = 0
168169 current_page = 1
169170 total_pages = None
170- per_page = 100
171- if args .limit is not None :
172- per_page = args .limit
173- if records_processed >= args .limit :
174- LOGGER .info (
175- f"Limit Reached: { records_processed } processed. "
176- f"Skipping remaining record types."
177- )
178- break
171+ # 300 , 100 ==> 100
172+ # 20, 100 ====> 20
173+ per_page = min (PER_PAGE , args .limit ) if args .limit else PER_PAGE
174+ # if args.limit is not None:
175+ # if records_processed >= args.limit:
176+ # LOGGER.info(
177+ # f"Limit Reached: {records_processed} processed. "
178+ # f"Skipping remaining record types."
179+ # )
180+ # break
179181
180- LOGGER .info (
181- f"fetching page { current_page } of { record_type } s "
182- f"(records { (current_page * per_page ) - per_page } -"
183- f"{ current_page * per_page } )"
184- )
185182 while True :
186183 # 1. Construct the API query parameters
187184 params = {
@@ -190,6 +187,11 @@ def fetch_museums_victoria_data(args, session):
190187 "perpage" : per_page ,
191188 "recordtype" : record_type ,
192189 }
190+ LOGGER .info (
191+ f"fetching page { current_page } of { record_type } s "
192+ f"(records { (current_page * per_page ) - per_page } -"
193+ f"{ current_page * per_page } )"
194+ )
193195 try :
194196 r = session .get (BASE_URL , params = params , timeout = 30 )
195197 r .raise_for_status ()
@@ -222,7 +224,7 @@ def fetch_museums_victoria_data(args, session):
222224 headers = data .get ("headers" , {})
223225 total_pages = int (headers .get ("totalResults" , "0" ))
224226
225- if args .limit is not None and records_processed >= per_page :
227+ if args .limit is not None and records_processed >= args . limit :
226228 break
227229 current_page += 1
228230
0 commit comments