Skip to content

Commit 48cc114

Browse files
authored
Merge pull request #524 from OP-TED/feature/TED4-133
Update ted_api.py
2 parents e4b67c3 + abbe119 commit 48cc114

1 file changed

Lines changed: 42 additions & 30 deletions

File tree

ted_sws/notice_fetcher/adapters/ted_api.py

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22
import time
33
from datetime import date
44
from http import HTTPStatus
5-
from typing import List, Generator
5+
from typing import List, Generator, Callable, Optional
66

77
import requests
8+
from requests import Response
89

910
from ted_sws import config
10-
from ted_sws.event_manager.services.log import log_error
11+
from ted_sws.event_manager.services.log import log_error, log_warning
1112
from ted_sws.notice_fetcher.adapters.ted_api_abc import TedAPIAdapterABC, RequestAPI
1213

1314
DOCUMENTS_PER_PAGE = 100
@@ -30,6 +31,34 @@
3031
ENGLISH_LANGUAGE_CONTENT_KEY = "ENG"
3132
DOCUMENT_NOTICE_ID_KEY = "ND"
3233

34+
CUSTOM_HEADER = {'User-Agent': 'TED-SWS-Pipeline-Fetcher'}
35+
MAX_RETRIES = 5
36+
DEFAULT_BACKOFF_FACTOR = 1
37+
38+
39+
def execute_request_with_retries(request_lambda: Callable,
40+
max_retries: int = MAX_RETRIES,
41+
backoff_factor: float = DEFAULT_BACKOFF_FACTOR) -> Response:
42+
response = request_lambda()
43+
requests_counter = 0
44+
while response.status_code != HTTPStatus.OK:
45+
if requests_counter >= max_retries:
46+
log_warning(f"Max retries exceeded, retried {max_retries} times!")
47+
return response
48+
requests_counter += 1
49+
time_to_sleep = backoff_factor * requests_counter
50+
log_warning(f"Request returned status code {response.status_code}, retrying in {time_to_sleep} seconds!")
51+
time.sleep(time_to_sleep)
52+
response = request_lambda()
53+
return response
54+
55+
56+
def get_configured_custom_headers(custom_header: Optional[dict] = None) -> dict:
57+
headers = requests.utils.default_headers()
58+
if custom_header:
59+
headers.update(custom_header)
60+
return headers
61+
3362

3463
class TedRequestAPI(RequestAPI):
3564

@@ -40,15 +69,9 @@ def __call__(self, api_url: str, api_query: dict) -> dict:
4069
:param api_query:
4170
:return: dict
4271
"""
43-
44-
response = requests.post(api_url, json=api_query)
45-
try_again_request_count = 0
46-
while response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
47-
try_again_request_count += 1
48-
time.sleep(try_again_request_count * 0.1)
49-
response = requests.post(api_url, json=api_query)
50-
if try_again_request_count > 5:
51-
break
72+
headers = get_configured_custom_headers(CUSTOM_HEADER)
73+
response = execute_request_with_retries(
74+
request_lambda=lambda: requests.post(api_url, json=api_query, headers=headers))
5275
if response.ok:
5376
response_content = json.loads(response.text)
5477
return response_content
@@ -108,14 +131,9 @@ def _retrieve_document_content(self, document_content: dict) -> str:
108131
log_error(exception_message)
109132
raise Exception(exception_message)
110133
xml_document_content_link = xml_links[MULTIPLE_LANGUAGE_CONTENT_KEY]
111-
response = requests.get(xml_document_content_link)
112-
try_again_request_count = 0
113-
while response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
114-
try_again_request_count += 1
115-
time.sleep(try_again_request_count * 0.1)
116-
response = requests.get(xml_document_content_link)
117-
if try_again_request_count > 5:
118-
break
134+
headers = get_configured_custom_headers(CUSTOM_HEADER)
135+
response = execute_request_with_retries(
136+
request_lambda=lambda: requests.get(xml_document_content_link, headers=headers))
119137
if response.ok:
120138
return response.text
121139
else:
@@ -142,17 +160,11 @@ def get_generator_by_query(self, query: dict, result_fields: dict = None, load_c
142160
response_body = self.request_api(api_url=self.ted_api_url, api_query=query)
143161
documents_content += response_body[RESPONSE_RESULTS]
144162

145-
for document_content in documents_content:
146-
if load_content:
147-
document_content[DOCUMENT_CONTENT] = self._retrieve_document_content(document_content)
148-
del document_content[LINKS_TO_CONTENT_KEY]
149-
yield document_content
150-
else:
151-
for document_content in documents_content:
152-
if load_content:
153-
document_content[DOCUMENT_CONTENT] = self._retrieve_document_content(document_content)
154-
del document_content[LINKS_TO_CONTENT_KEY]
155-
yield document_content
163+
for document_content in documents_content:
164+
if load_content:
165+
document_content[DOCUMENT_CONTENT] = self._retrieve_document_content(document_content)
166+
del document_content[LINKS_TO_CONTENT_KEY]
167+
yield document_content
156168

157169
def get_by_query(self, query: dict, result_fields: dict = None, load_content: bool = True) -> List[dict]:
158170
"""

0 commit comments

Comments
 (0)