MOD: Improve Python client path typing+validation

cjdsellers · cjdsellers · commit 4c053d2cfcdf · 2023-02-22T22:20:40.000Z
diff --git a/databento/common/bento.py b/databento/common/bento.py
@@ -1,6 +1,7 @@
 import abc
 import datetime as dt
 from io import BytesIO
+from os import PathLike
 from pathlib import Path
 from typing import IO, TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
 
@@ -53,14 +54,14 @@ class FileDataSource(DataSource):
         The name of the file.
     nbytes : int
         The size of the data in bytes; equal to the file size.
-    path : Path
+    path : PathLike or str
         The path of the file.
     reader : IO[bytes]
         A `BufferedReader` for this file-backed data.
 
     """
 
-    def __init__(self, source: Union[Path, str]):
+    def __init__(self, source: Union[PathLike[str], str]):
         self._path = Path(source)
 
         if not self._path.is_file() or not self._path.exists():
@@ -615,7 +616,7 @@ def symbols(self) -> List[str]:
         return self._metadata["symbols"]
 
     @classmethod
-    def from_file(cls, path: Union[Path, str]) -> "Bento":
+    def from_file(cls, path: Union[PathLike[str], str]) -> "Bento":
         """
         Load the data from a DBN file at the given path.
 
diff --git a/databento/common/validation.py b/databento/common/validation.py
@@ -1,11 +1,39 @@
 from enum import Enum
+from os import PathLike
+from pathlib import Path
 from typing import Optional, Type, TypeVar, Union
 from urllib.parse import urlsplit, urlunsplit
 
 
 E = TypeVar("E", bound=Enum)
 
 
+def validate_path(value: Union[PathLike[str], str], param: str) -> Path:
+    """
+    Validate whether the given value is a valid path.
+
+    Parameters
+    ----------
+    value: PathLike or str
+        The value to validate.
+    param : str
+        The name of the parameter being validated (for any error message).
+
+    Returns
+    -------
+    Path
+        A valid path.
+
+    """
+    try:
+        return Path(value)
+    except TypeError as e:
+        raise TypeError(
+            f"The `{param}` was not a valid path type. "
+            "Use any of [str, bytes, os.PathLike].",
+        ) from e
+
+
 def validate_enum(
     value: object,
     enum: Type[E],
@@ -37,12 +65,12 @@ def validate_enum(
     """
     try:
         return enum(value)
-    except ValueError as exc:
+    except ValueError as e:
         valid = list(map(str, enum))
         raise ValueError(
             f"The `{param}` was not a valid value of {enum}, was '{value}'. "
             f"Use any of {valid}.",
-        ) from exc
+        ) from e
 
 
 def validate_maybe_enum(
diff --git a/databento/historical/api/batch.py b/databento/historical/api/batch.py
@@ -1,5 +1,6 @@
 import os
 from datetime import date
+from os import PathLike
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -23,7 +24,7 @@
     optional_symbols_list_to_string,
     optional_values_list_to_string,
 )
-from databento.common.validation import validate_enum
+from databento.common.validation import validate_enum, validate_path
 from databento.historical.api import API_VERSION
 from databento.historical.http import (
     BentoHttpAPI,
@@ -212,7 +213,7 @@ def list_files(self, job_id: str) -> List[Dict[str, Any]]:
 
     def download(
         self,
-        output_dir: Union[Path, str],
+        output_dir: Union[PathLike[str], str],
         job_id: str,
         filename_to_download: Optional[str] = None,
         enable_partial_downloads: bool = True,
@@ -227,7 +228,7 @@ def download(
 
         Parameters
         ----------
-        output_dir: Path or str
+        output_dir: PathLike or str
             The directory to download the file(s) to.
         job_id : str
             The batch job identifier.
@@ -238,6 +239,7 @@ def download(
             If partially downloaded files will be resumed using range request(s).
 
         """
+        output_dir = validate_path(output_dir, "output_dir")
         self._check_api_key()
 
         params: List[Tuple[str, Optional[str]]] = [
@@ -271,12 +273,12 @@ def download(
                 return
 
         # Prepare job directory
-        job_dir = os.path.join(output_dir, job_id)
+        job_dir = Path(output_dir) / job_id
         os.makedirs(job_dir, exist_ok=True)
 
         for details in job_files:
             filename = str(details["filename"])
-            output_path = os.path.join(job_dir, filename)
+            output_path = job_dir / filename
             log_info(
                 f"Downloading batch job file to {output_path} ...",
             )
@@ -305,7 +307,7 @@ def _download_file(
         self,
         url: str,
         filesize: int,
-        output_path: str,
+        output_path: Path,
         enable_partial_downloads: bool,
     ) -> None:
         headers, mode = self._get_file_download_headers_and_mode(
@@ -329,7 +331,7 @@ def _download_file(
 
     async def download_async(
         self,
-        output_dir: Union[Path, str],
+        output_dir: Union[PathLike[str], str],
         job_id: str,
         filename_to_download: Optional[str] = None,
         enable_partial_downloads: bool = True,
@@ -345,7 +347,7 @@ async def download_async(
 
         Parameters
         ----------
-        output_dir: Path or str
+        output_dir: PathLike or str
             The directory to download the file(s) to.
         job_id : str
             The batch job identifier.
@@ -356,6 +358,7 @@ async def download_async(
             If partially downloaded files will be resumed using range request(s).
 
         """
+        output_dir = validate_path(output_dir, "output_dir")
         self._check_api_key()
 
         params: List[Tuple[str, Optional[str]]] = [
@@ -389,12 +392,12 @@ async def download_async(
                 return
 
         # Prepare job directory
-        job_dir = os.path.join(output_dir, job_id)
+        job_dir = Path(output_dir) / job_id
         os.makedirs(job_dir, exist_ok=True)
 
         for details in job_files:
             filename = str(details["filename"])
-            output_path = os.path.join(job_dir, filename)
+            output_path = job_dir / filename
             log_info(
                 f"Downloading batch job file to {output_path} ...",
             )
@@ -423,7 +426,7 @@ async def _download_file_async(
         self,
         url: str,
         filesize: int,
-        output_path: str,
+        output_path: Path,
         enable_partial_downloads: bool,
     ) -> None:
         headers, mode = self._get_file_download_headers_and_mode(
@@ -449,15 +452,15 @@ async def _download_file_async(
     def _get_file_download_headers_and_mode(
         self,
         filesize: int,
-        output_path: str,
+        output_path: Path,
         enable_partial_downloads: bool,
     ) -> Tuple[Dict[str, str], str]:
         headers: Dict[str, str] = self._headers.copy()
         mode = "wb"
 
         # Check if file already exists in partially downloaded state
-        if enable_partial_downloads and os.path.isfile(output_path):
-            existing_size = os.path.getsize(output_path)
+        if enable_partial_downloads and output_path.is_file():
+            existing_size = output_path.stat().st_size
             if existing_size < filesize:
                 # Make range request for partial download,
                 # will be from next byte to end of file.
diff --git a/databento/historical/api/timeseries.py b/databento/historical/api/timeseries.py
@@ -1,7 +1,7 @@
 import warnings
 from datetime import date
 from io import BufferedIOBase, BytesIO
-from pathlib import Path
+from os import PathLike
 from typing import List, Optional, Tuple, Union
 
 import pandas as pd
@@ -35,7 +35,7 @@ def stream(
         stype_in: Union[SType, str] = "native",
         stype_out: Union[SType, str] = "product_id",
         limit: Optional[int] = None,
-        path: Optional[Union[Path, str]] = None,
+        path: Optional[Union[PathLike[str], str]] = None,
     ) -> Bento:
         """
         The `.stream` method is deprecated and will be removed in a future version.
@@ -63,7 +63,7 @@ def get_range(
         stype_in: Union[SType, str] = "native",
         stype_out: Union[SType, str] = "product_id",
         limit: Optional[int] = None,
-        path: Optional[Union[Path, str]] = None,
+        path: Optional[Union[PathLike[str], str]] = None,
     ) -> Bento:
         """
         Request a historical time series data stream from Databento.
@@ -98,7 +98,7 @@ def get_range(
             The output symbology type to resolve to.
         limit : int, optional
             The maximum number of records to return. If `None` then no limit.
-        path : Path or str, optional
+        path : PathLike or str, optional
             The path to stream the data to on disk (will then return a `Bento`).
 
         Returns
@@ -170,7 +170,7 @@ async def stream_async(
         stype_in: Union[SType, str] = "native",
         stype_out: Union[SType, str] = "product_id",
         limit: Optional[int] = None,
-        path: Optional[Union[Path, str]] = None,
+        path: Optional[Union[PathLike[str], str]] = None,
     ) -> Bento:
         """
         The `.stream_async` method is deprecated and will be removed in a future
@@ -199,10 +199,10 @@ async def get_range_async(
         stype_in: Union[SType, str] = "native",
         stype_out: Union[SType, str] = "product_id",
         limit: Optional[int] = None,
-        path: Optional[Union[Path, str]] = None,
+        path: Optional[Union[PathLike[str], str]] = None,
     ) -> Bento:
         """
-        Request a historical time series data stream from Databento asynchronously.
+        Asynchronously request a historical time series data stream from Databento.
 
         Makes a `GET /timeseries.get_range` HTTP request.
 
@@ -234,7 +234,7 @@ async def get_range_async(
             The output symbology type to resolve to.
         limit : int, optional
             The maximum number of records to return. If `None` then no limit.
-        path : Path or str, optional
+        path : PathLike or str, optional
             The path to stream the data to on disk (will then return a `Bento`).
 
         Returns
diff --git a/tests/test_common_validation.py b/tests/test_common_validation.py
@@ -7,11 +7,26 @@
     validate_enum,
     validate_gateway,
     validate_maybe_enum,
+    validate_path,
     validate_smart_symbol,
 )
 
 
 class TestValidation:
+    @pytest.mark.parametrize(
+        "value",
+        [
+            [None, 0],
+        ],
+    )
+    def test_validate_path_given_wrong_types_raises_type_error(
+        self,
+        value: Any,
+    ) -> None:
+        # Arrange, Act, Assert
+        with pytest.raises(TypeError):
+            validate_path(value, "param")
+
     @pytest.mark.parametrize(
         "value, enum",
         [