Skip to content

Commit 2f56057

Browse files
committed
Fix: --limit fetching per record type
1 parent 1dc68e8 commit 2f56057

1 file changed

Lines changed: 18 additions & 16 deletions

File tree

scripts/1-fetch/museums_victoria_fetch.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
HEADER1_COUNT = ["TOOL IDENTIFIER", "COUNT"]
4444
HEADER2_MEDIA = ["TOOL IDENTIFIER", "MEDIA TYPE", "COUNT"]
4545
HEADER3_RECORD = ["TOOL IDENTIFIER", "RECORD TYPE", "COUNT"]
46+
PER_PAGE = 100
4647
QUARTER = os.path.basename(PATHS["data_quarter"])
4748
RECORD_TYPES = [
4849
"article",
@@ -161,27 +162,23 @@ def fetch_museums_victoria_data(args, session):
161162
record_counts = defaultdict(lambda: defaultdict(int))
162163
media_counts = defaultdict(lambda: defaultdict(int))
163164
licences_count = defaultdict(int)
164-
records_processed = 0
165165

166166
# Iterate through each record type
167167
for record_type in RECORD_TYPES:
168+
records_processed = 0
168169
current_page = 1
169170
total_pages = None
170-
per_page = 100
171-
if args.limit is not None:
172-
per_page = args.limit
173-
if records_processed >= args.limit:
174-
LOGGER.info(
175-
f"Limit Reached: {records_processed} processed. "
176-
f"Skipping remaining record types."
177-
)
178-
break
171+
# 300 , 100 ==> 100
172+
# 20, 100 ====> 20
173+
per_page = min(PER_PAGE, args.limit) if args.limit else PER_PAGE
174+
# if args.limit is not None:
175+
# if records_processed >= args.limit:
176+
# LOGGER.info(
177+
# f"Limit Reached: {records_processed} processed. "
178+
# f"Skipping remaining record types."
179+
# )
180+
# break
179181

180-
LOGGER.info(
181-
f"fetching page {current_page} of {record_type}s "
182-
f"(records {(current_page * per_page) - per_page}-"
183-
f"{current_page * per_page})"
184-
)
185182
while True:
186183
# 1. Construct the API query parameters
187184
params = {
@@ -190,6 +187,11 @@ def fetch_museums_victoria_data(args, session):
190187
"perpage": per_page,
191188
"recordtype": record_type,
192189
}
190+
LOGGER.info(
191+
f"fetching page {current_page} of {record_type}s "
192+
f"(records {(current_page * per_page) - per_page}-"
193+
f"{current_page * per_page})"
194+
)
193195
try:
194196
r = session.get(BASE_URL, params=params, timeout=30)
195197
r.raise_for_status()
@@ -222,7 +224,7 @@ def fetch_museums_victoria_data(args, session):
222224
headers = data.get("headers", {})
223225
total_pages = int(headers.get("totalResults", "0"))
224226

225-
if args.limit is not None and records_processed >= per_page:
227+
if args.limit is not None and records_processed >= args.limit:
226228
break
227229
current_page += 1
228230

0 commit comments

Comments
 (0)