@@ -46,14 +46,12 @@ def _left_pad_float(seq: List[float], max_len: int, pad: float = 0.0) -> List[fl
4646class MPFClinicalPredictionTask (BaseTask ):
4747 """Binary mortality prediction from FHIR CEHR sequences with optional MPF tokens.
4848
49- Vocabulary warming happens automatically in :meth:`prepare_for_dataset`
50- (called by :meth:`~pyhealth.datasets.BaseDataset.set_task` before the
51- LitData caching pipeline). If the dataset exposes a
52- :class:`~pyhealth.processors.CehrProcessor` via ``dataset.processor``,
53- this task adopts its vocabulary so that a pre-loaded ``vocab_path`` is
54- honoured. After warming, :attr:`frozen_vocab` is set when multiple
55- workers will be spawned so that worker processes look up tokens instead
56- of racing on :class:`~pyhealth.processors.ConceptVocab`.
49+ The task owns a :class:`~pyhealth.processors.CehrProcessor` and its
50+ :class:`~pyhealth.processors.ConceptVocab`. For single-worker use the
51+ vocabulary grows lazily in :meth:`__call__`. For multi-worker LitData
52+ runs, call :meth:`warm_vocab` before
53+ :meth:`~pyhealth.datasets.BaseDataset.set_task` so the vocabulary is
54+ complete and :attr:`frozen_vocab` prevents races across workers.
5755
5856 Attributes:
5957 max_len: Truncated sequence length (must be >= 2 for boundary tokens).
@@ -121,10 +119,6 @@ def warm_vocab(self, dataset: Any, num_workers: int = 1) -> None:
121119 task.warm_vocab(ds, num_workers=4)
122120 sample_dataset = ds.set_task(task, num_workers=4)
123121
124- If *dataset* has a ``processor`` attribute (i.e. it is a
125- :class:`~pyhealth.datasets.MIMIC4FHIRDataset` with a pre-loaded
126- ``vocab_path``), this task adopts its vocabulary.
127-
128122 Args:
129123 dataset: A :class:`~pyhealth.datasets.BaseDataset` instance whose
130124 ``global_event_df`` has already been built.
0 commit comments