Skip to content

Commit 4316cb0

Browse files
authored
VER: Release 0.5.0
See release notes.
2 parents 5fe529e + e6977d9 commit 4316cb0

34 files changed

+1206
-927
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## 0.5.0 - 2022-11-07
4+
- Fixed dataframe columns for derived data schemas (dropped `channel_id`)
5+
- Fixed `batch.submit_job` requests for `dbz` encoding
6+
- Updated `quickstart.ipynb` jupyter notebook
7+
38
## 0.4.0 - 2022-09-14
49
- Upgraded `dbz-python` to `0.1.5`
510
- Added `map_symbols` option for `.to_df()` (experimental)

LICENSE

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,4 +172,3 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
172172
defend, and hold each Contributor harmless for any liability
173173
incurred by, or claims asserted against, such Contributor by reason
174174
of your accepting any such warranty or additional liability.
175-

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ To install the latest stable version of the package from PyPI:
4545
## Usage
4646
The library needs to be configured with an API key from your account.
4747
[Sign up](https://databento.com/signup) for free and you will automatically
48-
receive a set of API keys to start with. Each API key is a 28-character
49-
string that can be found on the API Keys page of your [Databento user portal](https://databento.com/platform/keys).
48+
receive a set of API keys to start with. Each API key is a 32-character
49+
string starting with `db-`, that can be found on the API Keys page of your [Databento user portal](https://databento.com/platform/keys).
5050

5151
A simple Databento application looks like this:
5252

@@ -75,7 +75,7 @@ array = data.to_ndarray() # to ndarray
7575
```
7676

7777
Note that the API key was also passed as a parameter, which is
78-
[not recommended for production applications](https://docs0.databento.com/knowledge-base/new-users/securing-your-api-keys?historical=python&live=python).
78+
[not recommended for production applications](https://docs.databento.com/knowledge-base/kb-new-users/kb-new-security-managing-api-keys?historical=python&live=python).
7979
Instead, you can leave out this parameter to pass your API key via the `DATABENTO_API_KEY` environment variable:
8080

8181
```python

databento/__init__.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,22 @@
11
from typing import Optional
22

33
from databento.common.bento import Bento, FileBento, MemoryBento
4+
from databento.common.enums import (
5+
Compression,
6+
Dataset,
7+
Delivery,
8+
Encoding,
9+
FeedMode,
10+
Flags,
11+
HistoricalGateway,
12+
LiveGateway,
13+
Packaging,
14+
RollRule,
15+
Schema,
16+
SplitDuration,
17+
SType,
18+
SymbologyResolution,
19+
)
420
from databento.historical.api import API_VERSION
521
from databento.historical.client import Historical
622
from databento.historical.error import (
@@ -19,9 +35,23 @@
1935
"BentoError",
2036
"BentoHttpError",
2137
"BentoServerError",
38+
"Compression",
39+
"Dataset",
40+
"Delivery",
41+
"Encoding",
42+
"FeedMode",
2243
"FileBento",
44+
"Flags",
2345
"Historical",
46+
"HistoricalGateway",
47+
"LiveGateway",
2448
"MemoryBento",
49+
"Packaging",
50+
"RollRule",
51+
"Schema",
52+
"SplitDuration",
53+
"SType",
54+
"SymbologyResolution",
2555
]
2656

2757
# Set to either 'DEBUG' or 'INFO', controls console logging

databento/common/bento.py

Lines changed: 22 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,26 @@
11
import datetime as dt
22
import io
33
import os.path
4-
from typing import Any, BinaryIO, Callable, Dict, List, Optional, Tuple
4+
from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Dict, List, Optional
55

66
import numpy as np
77
import pandas as pd
88
import zstandard
9-
from databento.common.data import DBZ_COLUMNS, DBZ_STRUCT_MAP, DERIV_SCHEMAS
9+
from databento.common.data import COLUMNS, DERIV_SCHEMAS, STRUCT_MAP
1010
from databento.common.enums import Compression, Encoding, Schema, SType
1111
from databento.common.logging import log_debug
1212
from databento.common.metadata import MetadataDecoder
1313
from databento.common.symbology import ProductIdMappingInterval
1414

1515

16+
if TYPE_CHECKING:
17+
from databento.historical.client import Historical
18+
19+
1620
class Bento:
1721
"""The abstract base class for all Bento I/O classes."""
1822

19-
def __init__(self):
23+
def __init__(self) -> None:
2024
self._metadata: Dict[str, Any] = {}
2125
self._dtype: Optional[np.dtype] = None
2226
self._product_id_index: Dict[dt.date, Dict[int, str]] = {}
@@ -31,7 +35,7 @@ def __init__(self):
3135
self._limit: Optional[int] = None
3236
self._encoding: Optional[Encoding] = None
3337
self._compression: Optional[Compression] = None
34-
self._shape: Optional[Tuple] = None
38+
self._record_count: Optional[int] = None
3539

3640
def _check_metadata(self) -> None:
3741
if not self._metadata:
@@ -155,7 +159,7 @@ def dtype(self) -> np.dtype:
155159
"""
156160
if self._dtype is None:
157161
self._check_metadata()
158-
self._dtype = np.dtype(DBZ_STRUCT_MAP[self.schema])
162+
self._dtype = np.dtype(STRUCT_MAP[self.schema])
159163

160164
return self._dtype
161165

@@ -336,24 +340,20 @@ def compression(self) -> Compression:
336340
return self._compression
337341

338342
@property
339-
def shape(self) -> Tuple:
343+
def record_count(self) -> int:
340344
"""
341-
Return the shape of the data.
345+
Return the record count.
342346
343347
Returns
344348
-------
345-
Tuple
346-
The data shape.
349+
int
347350
348351
"""
349-
if self._shape is None:
352+
if self._record_count is None:
350353
self._check_metadata()
351-
self._shape = (
352-
self._metadata["record_count"],
353-
len(DBZ_STRUCT_MAP[self.schema]),
354-
)
354+
self._record_count = self._metadata["record_count"]
355355

356-
return self._shape
356+
return self._record_count
357357

358358
@property
359359
def mappings(self) -> Dict[str, List[Dict[str, Any]]]:
@@ -404,7 +404,7 @@ def to_ndarray(self) -> np.ndarray:
404404
405405
"""
406406
data: bytes = self.reader(decompress=True).read()
407-
return np.frombuffer(data, dtype=DBZ_STRUCT_MAP[self.schema])
407+
return np.frombuffer(data, dtype=STRUCT_MAP[self.schema])
408408

409409
def to_df(
410410
self,
@@ -437,20 +437,12 @@ def to_df(
437437
df.set_index(self._get_index_column(), inplace=True)
438438

439439
# Cleanup dataframe
440-
if self.schema == Schema.MBO:
441-
df.drop("channel_id", axis=1, inplace=True)
442-
df = df.reindex(columns=DBZ_COLUMNS[self.schema])
440+
df.drop(["length", "rtype"], axis=1, inplace=True)
441+
if self.schema == Schema.MBO or self.schema in DERIV_SCHEMAS:
442+
df = df.reindex(columns=COLUMNS[self.schema])
443443
df["flags"] = df["flags"] & 0xFF # Apply bitmask
444444
df["side"] = df["side"].str.decode("utf-8")
445445
df["action"] = df["action"].str.decode("utf-8")
446-
elif self.schema in DERIV_SCHEMAS:
447-
df.drop(["nwords", "type", "depth"], axis=1, inplace=True)
448-
df = df.reindex(columns=DBZ_COLUMNS[self.schema])
449-
df["flags"] = df["flags"] & 0xFF # Apply bitmask
450-
df["side"] = df["side"].str.decode("utf-8")
451-
df["action"] = df["action"].str.decode("utf-8")
452-
else:
453-
df.drop(["nwords", "type"], axis=1, inplace=True)
454446

455447
if pretty_ts:
456448
df.index = pd.to_datetime(df.index, utc=True)
@@ -493,7 +485,7 @@ def replay(self, callback: Callable[[Any], None]) -> None:
493485
The callback to the data handler.
494486
495487
"""
496-
dtype = DBZ_STRUCT_MAP[self.schema]
488+
dtype = STRUCT_MAP[self.schema]
497489
reader: BinaryIO = self.reader(decompress=True)
498490
while True:
499491
raw: bytes = reader.read(self.struct_size)
@@ -590,7 +582,7 @@ def to_json(self, path: str) -> None:
590582
"""
591583
self.to_df().to_json(path, orient="records", lines=True)
592584

593-
def request_symbology(self, client) -> Dict[str, Any]:
585+
def request_symbology(self, client: "Historical") -> Dict[str, Any]:
594586
"""
595587
Request symbology resolution based on the metadata properties.
596588
@@ -622,7 +614,7 @@ def request_symbology(self, client) -> Dict[str, Any]:
622614

623615
def request_full_definitions(
624616
self,
625-
client,
617+
client: "Historical",
626618
path: Optional[str] = None,
627619
) -> "Bento":
628620
"""

databento/common/data.py

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,16 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
3636
)
3737

3838

39-
DBZ_COMMON_HEADER: List[Tuple[str, Union[type, str]]] = [
40-
("nwords", np.uint8),
41-
("type", np.uint8),
39+
RECORD_HEADER: List[Tuple[str, Union[type, str]]] = [
40+
("length", np.uint8),
41+
("rtype", np.uint8),
4242
("publisher_id", np.uint16),
4343
("product_id", np.uint32),
4444
("ts_event", np.uint64),
4545
]
4646

4747

48-
DBZ_MBP_MSG: List[Tuple[str, Union[type, str]]] = [
48+
MBP_MSG: List[Tuple[str, Union[type, str]]] = [
4949
("price", np.int64),
5050
("size", np.uint32),
5151
("action", "S1"), # 1 byte chararray
@@ -58,7 +58,7 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
5858
]
5959

6060

61-
DBZ_OHLCV_MSG: List[Tuple[str, Union[type, str]]] = [
61+
OHLCV_MSG: List[Tuple[str, Union[type, str]]] = [
6262
("open", np.int64),
6363
("high", np.int64),
6464
("low", np.int64),
@@ -67,8 +67,8 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
6767
]
6868

6969

70-
DBZ_STRUCT_MAP: Dict[Schema, List[Tuple[str, Union[type, str]]]] = {
71-
Schema.MBO: DBZ_COMMON_HEADER
70+
STRUCT_MAP: Dict[Schema, List[Tuple[str, Union[type, str]]]] = {
71+
Schema.MBO: RECORD_HEADER
7272
+ [
7373
("order_id", np.uint64),
7474
("price", np.int64),
@@ -81,9 +81,9 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
8181
("ts_in_delta", np.int32),
8282
("sequence", np.uint32),
8383
],
84-
Schema.MBP_1: DBZ_COMMON_HEADER + DBZ_MBP_MSG + get_deriv_ba_types(0), # 1
85-
Schema.MBP_10: DBZ_COMMON_HEADER
86-
+ DBZ_MBP_MSG
84+
Schema.MBP_1: RECORD_HEADER + MBP_MSG + get_deriv_ba_types(0), # 1
85+
Schema.MBP_10: RECORD_HEADER
86+
+ MBP_MSG
8787
+ get_deriv_ba_types(0) # 1
8888
+ get_deriv_ba_types(1) # 2
8989
+ get_deriv_ba_types(2) # 3
@@ -94,21 +94,21 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
9494
+ get_deriv_ba_types(7) # 8
9595
+ get_deriv_ba_types(8) # 9
9696
+ get_deriv_ba_types(9), # 10
97-
Schema.TBBO: DBZ_COMMON_HEADER + DBZ_MBP_MSG + get_deriv_ba_types(0),
98-
Schema.TRADES: DBZ_COMMON_HEADER + DBZ_MBP_MSG,
99-
Schema.OHLCV_1S: DBZ_COMMON_HEADER + DBZ_OHLCV_MSG,
100-
Schema.OHLCV_1M: DBZ_COMMON_HEADER + DBZ_OHLCV_MSG,
101-
Schema.OHLCV_1H: DBZ_COMMON_HEADER + DBZ_OHLCV_MSG,
102-
Schema.OHLCV_1D: DBZ_COMMON_HEADER + DBZ_OHLCV_MSG,
103-
Schema.STATUS: DBZ_COMMON_HEADER
97+
Schema.TBBO: RECORD_HEADER + MBP_MSG + get_deriv_ba_types(0),
98+
Schema.TRADES: RECORD_HEADER + MBP_MSG,
99+
Schema.OHLCV_1S: RECORD_HEADER + OHLCV_MSG,
100+
Schema.OHLCV_1M: RECORD_HEADER + OHLCV_MSG,
101+
Schema.OHLCV_1H: RECORD_HEADER + OHLCV_MSG,
102+
Schema.OHLCV_1D: RECORD_HEADER + OHLCV_MSG,
103+
Schema.STATUS: RECORD_HEADER
104104
+ [
105105
("ts_recv", np.uint64),
106106
("group", "S1"), # 1 byte chararray
107107
("trading_status", np.uint8),
108108
("halt_reason", np.uint8),
109109
("trading_event", np.uint8),
110110
],
111-
Schema.DEFINITION: DBZ_COMMON_HEADER
111+
Schema.DEFINITION: RECORD_HEADER
112112
+ [
113113
("ts_recv", np.uint64),
114114
("min_price_increment", np.int64),
@@ -191,24 +191,26 @@ def get_deriv_ba_fields(level: int) -> List[str]:
191191
]
192192

193193

194-
DBZ_DERIV_HEADER_FIELDS = [
194+
DERIV_HEADER_FIELDS = [
195195
"ts_event",
196196
"ts_in_delta",
197197
"publisher_id",
198198
"product_id",
199199
"action",
200200
"side",
201+
"depth",
201202
"flags",
202203
"price",
203204
"size",
204205
"sequence",
205206
]
206207

207-
DBZ_COLUMNS = {
208+
COLUMNS = {
208209
Schema.MBO: [
209210
"ts_event",
210211
"ts_in_delta",
211212
"publisher_id",
213+
"channel_id",
212214
"product_id",
213215
"order_id",
214216
"action",
@@ -218,8 +220,8 @@ def get_deriv_ba_fields(level: int) -> List[str]:
218220
"size",
219221
"sequence",
220222
],
221-
Schema.MBP_1: DBZ_DERIV_HEADER_FIELDS + get_deriv_ba_fields(0),
222-
Schema.MBP_10: DBZ_DERIV_HEADER_FIELDS
223+
Schema.MBP_1: DERIV_HEADER_FIELDS + get_deriv_ba_fields(0),
224+
Schema.MBP_10: DERIV_HEADER_FIELDS
223225
+ get_deriv_ba_fields(0)
224226
+ get_deriv_ba_fields(1)
225227
+ get_deriv_ba_fields(2)
@@ -230,6 +232,6 @@ def get_deriv_ba_fields(level: int) -> List[str]:
230232
+ get_deriv_ba_fields(7)
231233
+ get_deriv_ba_fields(8)
232234
+ get_deriv_ba_fields(9),
233-
Schema.TBBO: DBZ_DERIV_HEADER_FIELDS + get_deriv_ba_fields(0),
234-
Schema.TRADES: DBZ_DERIV_HEADER_FIELDS,
235+
Schema.TBBO: DERIV_HEADER_FIELDS + get_deriv_ba_fields(0),
236+
Schema.TRADES: DERIV_HEADER_FIELDS,
235237
}

databento/common/enums.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
class HistoricalGateway(Enum):
66
"""Represents a historical data center gateway location."""
77

8-
NEAREST = "nearest"
98
BO1 = "bo1"
109

1110

@@ -14,7 +13,6 @@ class LiveGateway(Enum):
1413
"""Represents a live data center gateway location."""
1514

1615
ORIGIN = "origin"
17-
NEAREST = "nearest"
1816
NY4 = "ny4"
1917
DC3 = "dc3"
2018

@@ -72,8 +70,8 @@ class Compression(Enum):
7270

7371

7472
@unique
75-
class Duration(Enum):
76-
"""Represents the duration interval for each batch data file."""
73+
class SplitDuration(Enum):
74+
"""Represents the duration before splitting for each batched data file."""
7775

7876
DAY = "day"
7977
WEEK = "week"
@@ -92,7 +90,7 @@ class Packaging(Enum):
9290

9391
@unique
9492
class Delivery(Enum):
95-
"""Represents the delivery mechanism for batch data."""
93+
"""Represents the delivery mechanism for batched data."""
9694

9795
DOWNLOAD = "download"
9896
S3 = "s3"

0 commit comments

Comments
 (0)