@@ -269,8 +269,6 @@ class Bento:
269269 The raw compressed data in bytes.
270270 reader : IO[bytes]
271271 A zstd decompression stream.
272- record_count : int
273- The record count.
274272 schema : Schema
275273 The data record schema.
276274 start : pd.Timestamp
@@ -347,10 +345,17 @@ def __init__(self, data_source: DataSource) -> None:
347345
348346 def __iter__ (self ) -> Generator [np .void , None , None ]:
349347 reader = self .reader
350- for _ in range ( self . record_count ) :
348+ while True :
351349 raw = reader .read (self .record_size )
352- rec = np .frombuffer (raw , dtype = STRUCT_MAP [self .schema ])
353- yield rec [0 ]
350+ if raw :
351+ rec = np .frombuffer (raw , dtype = STRUCT_MAP [self .schema ])
352+ yield rec [0 ]
353+ else :
354+ break
355+
356+ def __repr__ (self ) -> str :
357+ name = self .__class__ .__name__
358+ return f"<{ name } (schema={ self .schema } )>"
354359
355360 def _apply_pretty_ts (self , df : pd .DataFrame ) -> pd .DataFrame :
356361 df .index = pd .to_datetime (df .index , utc = True )
@@ -412,8 +417,10 @@ def _build_product_id_index(self) -> Dict[dt.date, Dict[int, str]]:
412417 return product_id_index
413418
414419 def _prepare_dataframe (self , df : pd .DataFrame ) -> pd .DataFrame :
420+ # Setup column ordering and index
415421 df .set_index (self ._get_index_column (), inplace = True )
416- df .drop (["length" , "rtype" ], axis = 1 , inplace = True )
422+ df = df .reindex (columns = COLUMNS [self .schema ])
423+
417424 if self .schema == Schema .MBO or self .schema in DERIV_SCHEMAS :
418425 df ["flags" ] = df ["flags" ] & 0xFF # Apply bitmask
419426 df ["side" ] = df ["side" ].str .decode ("utf-8" )
@@ -424,10 +431,6 @@ def _prepare_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
424431 for column , type_max in DEFINITION_TYPE_MAX_MAP .items ():
425432 if column in df .columns :
426433 df [column ] = df [column ].where (df [column ] != type_max , np .nan )
427-
428- # Reorder columns
429- df = df .reindex (columns = COLUMNS [self .schema ])
430-
431434 return df
432435
433436 def _get_index_column (self ) -> str :
@@ -603,18 +606,6 @@ def reader(self) -> IO[bytes]:
603606 reader .seek (self ._metadata_length )
604607 return reader
605608
606- @property
607- def record_count (self ) -> int :
608- """
609- Return the record count.
610-
611- Returns
612- -------
613- int
614-
615- """
616- return self ._metadata ["record_count" ]
617-
618609 @property
619610 def schema (self ) -> Schema :
620611 """
0 commit comments