1818)
1919from databento .common .validation import validate_enum , validate_semantic_string
2020from databento .historical .api import API_VERSION
21+ from databento .historical .api .metadata import MetadataHttpAPI
2122from databento .historical .http import BentoHttpAPI
2223
2324
25+ WARN_REQUEST_SIZE : int = 5 * 10 ** 9 # 5 GB
26+
27+
2428class TimeSeriesHttpAPI (BentoHttpAPI ):
2529 """
2630 Provides request methods for the time series HTTP API endpoints.
@@ -127,10 +131,12 @@ def get_range(
127131 stype_in_valid = validate_enum (stype_in , SType , "stype_in" )
128132 symbols_list = optional_symbols_list_to_string (symbols , stype_in_valid )
129133 schema_valid = validate_enum (schema , Schema , "schema" )
134+ start_valid = datetime_to_string (start )
135+ end_valid = optional_datetime_to_string (end )
130136 params : List [Tuple [str , Optional [str ]]] = [
131137 ("dataset" , validate_semantic_string (dataset , "dataset" )),
132- ("start" , datetime_to_string ( start ) ),
133- ("end" , optional_datetime_to_string ( end ) ),
138+ ("start" , start_valid ),
139+ ("end" , end_valid ),
134140 ("symbols" , symbols_list ),
135141 ("schema" , str (schema_valid )),
136142 ("stype_in" , str (stype_in_valid )),
@@ -144,10 +150,12 @@ def get_range(
144150 params .append (("limit" , str (limit )))
145151
146152 self ._pre_check_data_size (
147- symbols = symbols ,
153+ dataset = dataset ,
154+ stype_in = stype_in_valid ,
155+ symbols = symbols_list ,
148156 schema = schema_valid ,
149- start = start ,
150- end = end ,
157+ start = start_valid ,
158+ end = end_valid ,
151159 limit = limit ,
152160 )
153161
@@ -267,10 +275,12 @@ async def get_range_async(
267275 stype_in_valid = validate_enum (stype_in , SType , "stype_in" )
268276 symbols_list = optional_symbols_list_to_string (symbols , stype_in_valid )
269277 schema_valid = validate_enum (schema , Schema , "schema" )
278+ start_valid = datetime_to_string (start )
279+ end_valid = optional_datetime_to_string (end )
270280 params : List [Tuple [str , Optional [str ]]] = [
271281 ("dataset" , validate_semantic_string (dataset , "dataset" )),
272- ("start" , datetime_to_string ( start ) ),
273- ("end" , optional_datetime_to_string ( end ) ),
282+ ("start" , start_valid ),
283+ ("end" , end_valid ),
274284 ("symbols" , symbols_list ),
275285 ("schema" , str (schema_valid )),
276286 ("stype_in" , str (stype_in_valid )),
@@ -283,10 +293,12 @@ async def get_range_async(
283293 params .append (("limit" , str (limit )))
284294
285295 self ._pre_check_data_size (
286- symbols = symbols ,
296+ dataset = dataset ,
297+ stype_in = stype_in_valid ,
298+ symbols = symbols_list ,
287299 schema = schema_valid ,
288- start = start ,
289- end = end ,
300+ start = start_valid ,
301+ end = end_valid ,
290302 limit = limit ,
291303 )
292304
@@ -308,59 +320,100 @@ async def get_range_async(
308320 writer .seek (0 ) # rewind for read
309321 return DBNStore .from_bytes (writer .read ())
310322
311- def _pre_check_data_size ( # noqa (prefer not to make static)
323+ def _pre_check_data_size (
312324 self ,
313- symbols : Optional [Union [List [str ], str ]],
325+ dataset : str ,
326+ symbols : str ,
314327 schema : Schema ,
315- start : Optional [Union [pd .Timestamp , date , str , int ]],
316- end : Optional [Union [pd .Timestamp , date , str , int ]],
328+ start : str ,
329+ end : Optional [str ],
330+ stype_in : SType ,
317331 limit : Optional [int ],
318332 ) -> None :
319- if limit and limit < 10 ** 7 :
333+ if _is_size_limited (
334+ schema = schema ,
335+ limit = limit ,
336+ ):
320337 return
321338
322- # Use heuristics to check ballpark data size
323- if (
324- _is_large_data_size_schema ( schema )
325- or _is_greater_than_one_day ( start , end )
326- or _is_large_number_of_symbols ( symbols )
339+ if _is_period_limited (
340+ schema = schema ,
341+ symbols = symbols ,
342+ start = start ,
343+ end = end ,
327344 ):
328- warnings .warn (
329- message = "The size of this streaming request is estimated "
330- "to be 5 GB or greater.\n We recommend breaking your request "
331- "into smaller requests, or submitting a batch download request.\n "
332- "This warning can be suppressed: "
333- "https://docs.python.org/3/library/warnings.html" ,
334- category = BentoWarning ,
335- stacklevel = 3 , # This makes the error happen in user code
336- )
337-
345+ return
338346
339- def _is_large_number_of_symbols (symbols : Optional [Union [List [str ], str ]]) -> bool :
340- if not symbols :
341- return True # Full universe
347+ metadata_api = MetadataHttpAPI (
348+ key = self ._key ,
349+ gateway = self ._gateway ,
350+ )
351+ request_size = metadata_api .get_billable_size (
352+ dataset = dataset ,
353+ start = start ,
354+ end = end ,
355+ symbols = symbols ,
356+ schema = schema ,
357+ stype_in = stype_in ,
358+ limit = limit ,
359+ )
342360
343- if isinstance ( symbols , str ) :
344- symbols = symbols . split ( "," )
361+ if request_size < WARN_REQUEST_SIZE :
362+ return
345363
346- if len (symbols ) >= 500 :
347- return True
364+ warnings .warn (
365+ message = """The size of this streaming request is greater than 5GB.
366+ It is recommended to submit a batch download request for large volumes
367+ of data, or break this request into smaller requests.
368+ This warning can be suppressed:
369+ https://docs.python.org/3/library/warnings.html""" ,
370+ category = BentoWarning ,
371+ stacklevel = 3 , # This makes the error happen in user code
372+ )
348373
349- return False
350374
375+ def _is_size_limited (
376+ schema : Schema ,
377+ limit : Optional [int ],
378+ max_size : int = WARN_REQUEST_SIZE ,
379+ ) -> bool :
380+ if limit is None :
381+ return False
351382
352- def _is_large_data_size_schema ( schema : Schema ) -> bool :
353- return schema in ( Schema . MBO , Schema . MBP_10 )
383+ estimated_size = limit * schema . get_record_type (). size_hint ()
384+ return estimated_size < max_size
354385
355386
356- def _is_greater_than_one_day (
357- start : Optional [Union [pd .Timestamp , date , str , int ]],
358- end : Optional [Union [pd .Timestamp , date , str , int ]],
387+ def _is_period_limited (
388+ schema : Schema ,
389+ symbols : str ,
390+ start : str ,
391+ end : Optional [str ],
392+ max_size : int = WARN_REQUEST_SIZE ,
359393) -> bool :
360- if start is None or end is None :
361- return True
362-
363- if pd .to_datetime (end ) - pd .to_datetime (start ) > pd .Timedelta (days = 1 ):
364- return True
365-
366- return False
394+ if end is None :
395+ return False
396+
397+ if schema not in (
398+ Schema .OHLCV_1S ,
399+ Schema .OHLCV_1M ,
400+ Schema .OHLCV_1H ,
401+ Schema .OHLCV_1D ,
402+ Schema .DEFINITION ,
403+ ):
404+ return False
405+
406+ dt_start = pd .to_datetime (start , utc = True )
407+ dt_end = pd .to_datetime (end , utc = True )
408+
409+ # default scale to one day for ohlcv_1d and definition
410+ scale = {
411+ Schema .OHLCV_1S : 1 ,
412+ Schema .OHLCV_1M : 60 ,
413+ Schema .OHLCV_1H : 60 * 60 ,
414+ }.get (schema , 60 * 60 * 24 )
415+
416+ num_symbols = len (symbols .split ("," ))
417+ num_records = num_symbols * (dt_end - dt_start ).total_seconds () // scale
418+ estimated_size = num_records * schema .get_record_type ().size_hint ()
419+ return estimated_size < max_size
0 commit comments