Skip to content

Commit 301de84

Browse files
committed
Simplify logging setup
Why these changes are being introduced: As a library that is installed by other applications, it is beneficial to have this library logging follow the levels and handlers set by the calling context. That said, it can also be helpful to set a level like INFO for the calling context, but DEBUG for this library. What we definitely want to avoid is this library setting up a handler and outputting logs, only to have them duplicated in the calling context. How this addresses that need: * Remove setting up of handlers; rely on calling context for handlers Side effects of this change: * For local development logging statements will not show up without some kind of manual logging.basicConfig() call or handler setup. * For calling contexts Transmogrifier, pipeline lambdas, and TIM to show this library's DEBUG logs, they will need to adjust their configure_logger() functions to avoid filtering out loggers from different namespaces. Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/TIMX-427
1 parent 6a2d8c1 commit 301de84

3 files changed

Lines changed: 15 additions & 25 deletions

File tree

tests/test_write.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_dataset_write_record_batches_uses_batch_size(
4545
total_records = 101
4646
batch_size = 50
4747
batches = list(
48-
new_local_dataset.get_dataset_record_batches(
48+
new_local_dataset.create_record_batches(
4949
sample_records_iter(total_records), batch_size=batch_size
5050
)
5151
)

timdex_dataset_api/config.py

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,22 @@
33

44

55
def configure_logger(name: str) -> logging.Logger:
6-
"""Prepares and returns a logger instance for a given module name.
6+
"""Prepares a logger instance.
77
8-
This approach is suitable for an installed and imported library such as this, where
9-
any calling application logging levels and handlers should be utilized.
8+
If the env var TDA_LOG_LEVEL is set, the logging level will override the logging
9+
level of the calling context.
1010
1111
Args:
1212
name (str): The name of the logger, typically __name__ is passed by caller
1313
"""
1414
logger = logging.getLogger(name)
15-
logger.addHandler(logging.NullHandler())
1615

17-
log_level = os.getenv("TDA_LOG_LEVEL", "INFO").strip().upper()
18-
if log_level not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
19-
raise ValueError(f"Invalid log level: '{log_level}'")
20-
logger.setLevel(getattr(logging, log_level))
21-
22-
handler = logging.StreamHandler()
23-
handler.setFormatter(
24-
logging.Formatter(
25-
"%(asctime)s %(levelname)s %(name)s.%(funcName)s() "
26-
"line %(lineno)d: %(message)s"
27-
)
28-
)
29-
logger.addHandler(handler)
16+
# set logger level if env var 'TDA_LOG_LEVEL' is set
17+
log_level = os.getenv("TDA_LOG_LEVEL")
18+
if log_level:
19+
log_level = log_level.strip().upper()
20+
if log_level not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
21+
raise ValueError(f"Invalid log level: '{log_level}'")
22+
logger.setLevel(getattr(logging, log_level))
3023

3124
return logger

timdex_dataset_api/dataset.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ def write(
316316
"Dataset location must be the root of a single dataset for writing"
317317
)
318318

319-
record_batches_iter = self.get_dataset_record_batches(
319+
record_batches_iter = self.create_record_batches(
320320
records_iter,
321321
batch_size=batch_size,
322322
)
@@ -341,7 +341,7 @@ def write(
341341
self.log_write_statistics(start_time)
342342
return self._written_files # type: ignore[return-value]
343343

344-
def get_dataset_record_batches(
344+
def create_record_batches(
345345
self,
346346
records_iter: Iterator["DatasetRecord"],
347347
*,
@@ -360,14 +360,11 @@ def get_dataset_record_batches(
360360
group size in final parquet files
361361
"""
362362
for i, record_batch in enumerate(itertools.batched(records_iter, batch_size)):
363-
batch_start_time = time.perf_counter()
364363
batch = pa.RecordBatch.from_pylist(
365364
[record.to_dict() for record in record_batch]
366365
)
367-
logger.debug(
368-
f"Batch {i + 1} yielded for writing, "
369-
f"elapsed: {round(time.perf_counter()-batch_start_time, 6)}s"
370-
)
366+
message = f"Yielding batch {i+1} for dataset writing."
367+
logger.debug(message)
371368
yield batch
372369

373370
def log_write_statistics(self, start_time: float) -> None:

0 commit comments

Comments
 (0)