1919 Union ,
2020)
2121
22- from langfuse .model import DatasetItem
23-
2422if TYPE_CHECKING :
2523 from langfuse ._client .datasets import DatasetItemClient
2624
2725
28- class ExperimentItem (TypedDict , total = False ):
26+ class LocalExperimentItem (TypedDict , total = False ):
2927 """Structure for experiment data items.
3028
3129 Args:
@@ -39,6 +37,10 @@ class ExperimentItem(TypedDict, total=False):
3937 metadata : Optional [Dict [str , Any ]]
4038
4139
40+ ExperimentItem = Union [LocalExperimentItem , DatasetItemClient ]
41+ ExperimentData = Union [List [LocalExperimentItem ], List [DatasetItemClient ]]
42+
43+
4244class Evaluation (TypedDict , total = False ):
4345 """Structure for evaluation results.
4446
@@ -66,7 +68,7 @@ class ExperimentItemResult(TypedDict):
6668 dataset_run_id: Dataset run ID if this item was part of a Langfuse dataset
6769 """
6870
69- item : Union [ ExperimentItem , DatasetItem ]
71+ item : ExperimentItem
7072 output : Any
7173 evaluations : List [Evaluation ]
7274 trace_id : Optional [str ]
@@ -93,7 +95,10 @@ class TaskFunction(Protocol):
9395 """Protocol for experiment task functions."""
9496
9597 def __call__ (
96- self , item : Union [ExperimentItem , dict , DatasetItem , "DatasetItemClient" ]
98+ self ,
99+ * ,
100+ item : ExperimentItem ,
101+ ** kwargs : Dict [str , Any ],
97102 ) -> Union [Any , Awaitable [Any ]]:
98103 """Execute the task on an experiment item.
99104
@@ -116,6 +121,7 @@ def __call__(
116121 output : Any ,
117122 expected_output : Any = None ,
118123 metadata : Optional [Dict [str , Any ]] = None ,
124+ ** kwargs : Dict [str , Any ],
119125 ) -> Union [
120126 Evaluation , List [Evaluation ], Awaitable [Union [Evaluation , List [Evaluation ]]]
121127 ]:
@@ -137,7 +143,10 @@ class RunEvaluatorFunction(Protocol):
137143 """Protocol for run-level evaluator functions."""
138144
139145 def __call__ (
140- self , * , item_results : List [ExperimentItemResult ]
146+ self ,
147+ * ,
148+ item_results : List [ExperimentItemResult ],
149+ ** kwargs : Dict [str , Any ],
141150 ) -> Union [
142151 Evaluation , List [Evaluation ], Awaitable [Union [Evaluation , List [Evaluation ]]]
143152 ]:
@@ -286,7 +295,7 @@ def _format_value(value: Any) -> str:
286295
287296
288297async def _run_evaluator (
289- evaluator : EvaluatorFunction , ** kwargs : Any
298+ evaluator : Union [ EvaluatorFunction , RunEvaluatorFunction ] , ** kwargs : Any
290299) -> List [Evaluation ]:
291300 """Run an evaluator function and normalize the result."""
292301 try :
@@ -299,8 +308,10 @@ async def _run_evaluator(
299308 # Normalize to list
300309 if isinstance (result , dict ):
301310 return [result ]
311+
302312 elif isinstance (result , list ):
303313 return result
314+
304315 else :
305316 return []
306317
@@ -310,12 +321,9 @@ async def _run_evaluator(
310321 return []
311322
312323
313- async def _run_task (
314- task : TaskFunction ,
315- item : Union [ExperimentItem , dict , DatasetItem , "DatasetItemClient" ],
316- ) -> Any :
324+ async def _run_task (task : TaskFunction , item : ExperimentItem ) -> Any :
317325 """Run a task function and handle sync/async."""
318- result = task (item )
326+ result = task (item = item )
319327
320328 # Handle async tasks
321329 if asyncio .iscoroutine (result ):
0 commit comments