Skip to content

Commit 3d68b44

Browse files
committed
ADD: Add Python client support for statistics
1 parent 743a425 commit 3d68b44

8 files changed

Lines changed: 50 additions & 21 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
# Changelog
22

33
## 0.13.0 - TBD
4+
- Added support for `statistics` schema
5+
- Upgraded `databento-dbn` to 0.5.1
46
- Renamed `booklevel` MBP field to `levels` for brevity and consistent naming
57
- Changed `flags` field to an unsigned int
68
- Changed default of `ts_out` to `False` for `Live` client
7-
- Removed `open_interest_qty` and `cleared_volume` fields that were always unset from
8-
definition schema
9+
- Removed `open_interest_qty` and `cleared_volume` fields that were always unset from definition schema
910

1011
## 0.12.0 - 2023-05-01
1112
- Added `Live` client for connecting to Databento's live service

databento/common/data.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,20 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
170170
("dummy", "S1"),
171171
]
172172

173+
STATISTICS_MSG: List[Tuple[str, Union[type, str]]] = RECORD_HEADER + [
174+
("ts_recv", np.uint64),
175+
("ts_ref", np.uint64),
176+
("price", np.int64),
177+
("quantity", np.int32),
178+
("sequence", np.uint32),
179+
("ts_in_delta", np.int32),
180+
("stat_type", np.uint16),
181+
("channel_id", np.uint16),
182+
("update_action", np.uint8),
183+
("stat_flags", np.uint8),
184+
("dummy", "S6"),
185+
]
186+
173187

174188
STRUCT_MAP: Dict[Schema, List[Tuple[str, Union[type, str]]]] = {
175189
Schema.MBO: MBO_MSG,
@@ -193,6 +207,7 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
193207
Schema.OHLCV_1D: OHLCV_MSG,
194208
Schema.DEFINITION: DEFINITION_MSG,
195209
Schema.IMBALANCE: IMBALANCE_MSG,
210+
Schema.STATISTICS: STATISTICS_MSG,
196211
}
197212

198213

@@ -288,6 +303,13 @@ def get_deriv_ba_fields(level: int) -> List[str]:
288303
"dummy",
289304
]
290305

306+
STATISTICS_DROP_COLUMNS = [
307+
"ts_recv",
308+
"length",
309+
"rtype",
310+
"dummy",
311+
]
312+
291313
DEFINITION_COLUMNS = [
292314
x
293315
for x in (np.dtype(DEFINITION_MSG).names or ())
@@ -298,6 +320,12 @@ def get_deriv_ba_fields(level: int) -> List[str]:
298320
x for x in (np.dtype(IMBALANCE_MSG).names or ()) if x not in IMBALANCE_DROP_COLUMNS
299321
]
300322

323+
STATISTICS_COLUMNS = [
324+
x
325+
for x in (np.dtype(STATISTICS_MSG).names or ())
326+
if x not in STATISTICS_DROP_COLUMNS
327+
]
328+
301329
COLUMNS = {
302330
Schema.MBO: [
303331
"ts_event",
@@ -333,4 +361,5 @@ def get_deriv_ba_fields(level: int) -> List[str]:
333361
Schema.OHLCV_1D: OHLCV_HEADER_COLUMNS,
334362
Schema.DEFINITION: DEFINITION_COLUMNS,
335363
Schema.IMBALANCE: IMBALANCE_COLUMNS,
364+
Schema.STATISTICS: STATISTICS_COLUMNS,
336365
}

databento/common/dbnstore.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -383,11 +383,19 @@ def _apply_pretty_ts(self, df: pd.DataFrame) -> pd.DataFrame:
383383
df.index = pd.to_datetime(df.index, utc=True)
384384
for column in df.columns:
385385
if column.startswith("ts_") and "delta" not in column:
386-
df[column] = pd.to_datetime(df[column], utc=True)
386+
df[column] = pd.to_datetime(df[column], errors="coerce", utc=True)
387387

388388
if self.schema == Schema.DEFINITION:
389-
df["expiration"] = pd.to_datetime(df["expiration"], utc=True)
390-
df["activation"] = pd.to_datetime(df["activation"], utc=True)
389+
df["expiration"] = pd.to_datetime(
390+
df["expiration"],
391+
errors="coerce",
392+
utc=True,
393+
)
394+
df["activation"] = pd.to_datetime(
395+
df["activation"],
396+
errors="coerce",
397+
utc=True,
398+
)
391399

392400
return df
393401

databento/common/enums.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
MBP1Msg,
1111
MBP10Msg,
1212
OHLCVMsg,
13+
StatMsg,
1314
TradeMsg,
1415
)
1516

@@ -192,6 +193,7 @@ class Schema(StringyMixin, str, Enum):
192193
OHLCV_1D = "ohlcv-1d"
193194
DEFINITION = "definition"
194195
IMBALANCE = "imbalance"
196+
STATISTICS = "statistics"
195197

196198
def get_record_type(self) -> Type[DBNRecord]:
197199
if self == Schema.MBO:
@@ -216,6 +218,8 @@ def get_record_type(self) -> Type[DBNRecord]:
216218
return InstrumentDefMsg
217219
if self == Schema.IMBALANCE:
218220
return ImbalanceMsg
221+
if self == Schema.STATISTICS:
222+
return StatMsg
219223
raise NotImplementedError(f"No message type for {self}")
220224

221225

databento/live/dbn.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
databento_dbn.OHLCVMsg,
2323
databento_dbn.ImbalanceMsg,
2424
databento_dbn.InstrumentDefMsg,
25+
databento_dbn.StatMsg,
2526
databento_dbn.SymbolMappingMsg,
2627
databento_dbn.SystemMsg,
2728
databento_dbn.ErrorMsg,

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
aiohttp>=3.7.2,<4.0.0
2-
databento-dbn==0.5.0
2+
databento-dbn==0.5.1
33
numpy>=1.17.0
44
pandas>=1.1.3
55
requests>=2.24.0
129 Bytes
Binary file not shown.

tests/test_historical_bento.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -256,21 +256,7 @@ def test_to_df_across_schemas_returns_identical_dimension_dfs(
256256

257257
@pytest.mark.parametrize(
258258
"schema",
259-
[
260-
pytest.param(schema, id=str(schema))
261-
for schema in (
262-
Schema.MBO,
263-
Schema.MBP_1,
264-
Schema.MBP_10,
265-
Schema.TBBO,
266-
Schema.TRADES,
267-
Schema.OHLCV_1S,
268-
Schema.OHLCV_1M,
269-
Schema.OHLCV_1H,
270-
Schema.OHLCV_1D,
271-
Schema.DEFINITION,
272-
)
273-
],
259+
[pytest.param(schema, id=str(schema)) for schema in Schema],
274260
)
275261
def test_to_df_drop_columns(
276262
test_data: Callable[[Schema], bytes],

0 commit comments

Comments
 (0)