Skip to content

Commit fbe5497

Browse files
committed
move to classes
1 parent e4a4599 commit fbe5497

3 files changed

Lines changed: 73 additions & 27 deletions

File tree

langfuse/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
""".. include:: ../README.md"""
22

3+
from langfuse.experiment import Evaluation
4+
35
from ._client import client as _client_module
46
from ._client.attributes import LangfuseOtelSpanAttributes
57
from ._client.constants import ObservationTypeLiteral
@@ -36,6 +38,7 @@
3638
"LangfuseEvaluator",
3739
"LangfuseRetriever",
3840
"LangfuseGuardrail",
41+
"Evaluation",
3942
"experiment",
4043
"api",
4144
]

langfuse/_client/client.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2840,10 +2840,8 @@ async def _process_experiment_item(
28402840
for evaluation in eval_results:
28412841
self.create_score(
28422842
trace_id=trace_id,
2843-
name=evaluation.name or "unknown",
2844-
value=evaluation.value
2845-
if evaluation.value is not None
2846-
else -1, # type: ignore
2843+
name=evaluation.name,
2844+
value=evaluation.value or -1,
28472845
comment=evaluation.comment,
28482846
metadata=evaluation.metadata,
28492847
)

langfuse/experiment.py

Lines changed: 68 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import asyncio
99
import logging
10-
from dataclasses import dataclass
1110
from typing import (
1211
TYPE_CHECKING,
1312
Any,
@@ -94,13 +93,11 @@ class LocalExperimentItem(TypedDict, total=False):
9493
"""
9594

9695

97-
@dataclass(frozen=True)
9896
class Evaluation:
99-
"""Represents an evaluation result for an experiment item.
97+
"""Represents an evaluation result for an experiment item or an entire experiment run.
10098
10199
This class provides a strongly-typed way to create evaluation results in evaluator functions.
102-
Users should import this class and return instances instead of dictionaries for better
103-
type safety and IDE support.
100+
Users must use keyword arguments when instantiating this class.
104101
105102
Attributes:
106103
name: Unique identifier for the evaluation metric. Should be descriptive
@@ -117,7 +114,7 @@ class Evaluation:
117114
metadata: Optional structured metadata about the evaluation process.
118115
Can include confidence scores, intermediate calculations, model versions,
119116
or any other relevant technical details.
120-
data_type: Optional score data type; one of NUMERIC, CATEGORICAL, or BOOLEAN; default: NUMERIC
117+
data_type: Optional score data type, required if value is not NUMERIC; one of NUMERIC, CATEGORICAL, or BOOLEAN; default: NUMERIC
121118
config_id: Optional Langfuse score config id
122119
123120
Examples:
@@ -180,25 +177,47 @@ def external_api_evaluator(*, input, output, **kwargs):
180177
```
181178
182179
Note:
183-
This class is immutable (frozen=True) to ensure evaluation results cannot be
184-
accidentally modified after creation. All fields except name and value are optional.
180+
All arguments must be passed as keywords. Positional arguments are not allowed
181+
to ensure code clarity and prevent errors from argument reordering.
185182
"""
186183

187-
name: str
188-
value: Union[int, float, str, bool, None]
189-
comment: Optional[str] = None
190-
metadata: Optional[Dict[str, Any]] = None
191-
data_type: Optional[ScoreDataType] = None
192-
config_id: Optional[str] = None
184+
def __init__(
185+
self,
186+
*,
187+
name: str,
188+
value: Union[int, float, str, bool, None],
189+
comment: Optional[str] = None,
190+
metadata: Optional[Dict[str, Any]] = None,
191+
data_type: Optional[ScoreDataType] = None,
192+
config_id: Optional[str] = None,
193+
):
194+
"""Initialize an Evaluation with the provided data.
195+
196+
Args:
197+
name: Unique identifier for the evaluation metric
198+
value: The evaluation score or result
199+
comment: Optional human-readable explanation of the result
200+
metadata: Optional structured metadata about the evaluation process
201+
data_type: Optional score data type (NUMERIC, CATEGORICAL, or BOOLEAN)
202+
config_id: Optional Langfuse score config id
203+
204+
Note:
205+
All arguments must be provided as keywords. Positional arguments will raise a TypeError.
206+
"""
207+
self.name = name
208+
self.value = value
209+
self.comment = comment
210+
self.metadata = metadata
211+
self.data_type = data_type
212+
self.config_id = config_id
193213

194214

195-
@dataclass(frozen=True)
196215
class ExperimentItemResult:
197216
"""Result structure for individual experiment items.
198217
199-
This dataclass represents the complete result of processing a single item
218+
This class represents the complete result of processing a single item
200219
during an experiment run, including the original input, task output,
201-
evaluations, and tracing information.
220+
evaluations, and tracing information. Users must use keyword arguments when instantiating this class.
202221
203222
Attributes:
204223
item: The original experiment item that was processed. Can be either
@@ -239,13 +258,38 @@ class ExperimentItemResult:
239258
input_data = item_result.item.input
240259
expected = item_result.item.expected_output
241260
```
261+
262+
Note:
263+
All arguments must be passed as keywords. Positional arguments are not allowed
264+
to ensure code clarity and prevent errors from argument reordering.
242265
"""
243266

244-
item: ExperimentItem
245-
output: Any
246-
evaluations: List[Evaluation]
247-
trace_id: Optional[str]
248-
dataset_run_id: Optional[str]
267+
def __init__(
268+
self,
269+
*,
270+
item: ExperimentItem,
271+
output: Any,
272+
evaluations: List[Evaluation],
273+
trace_id: Optional[str],
274+
dataset_run_id: Optional[str],
275+
):
276+
"""Initialize an ExperimentItemResult with the provided data.
277+
278+
Args:
279+
item: The original experiment item that was processed
280+
output: The actual output produced by the task function for this item
281+
evaluations: List of evaluation results for this item
282+
trace_id: Optional Langfuse trace ID for this item's execution
283+
dataset_run_id: Optional dataset run ID if this item was part of a Langfuse dataset
284+
285+
Note:
286+
All arguments must be provided as keywords. Positional arguments will raise a TypeError.
287+
"""
288+
self.item = item
289+
self.output = output
290+
self.evaluations = evaluations
291+
self.trace_id = trace_id
292+
self.dataset_run_id = dataset_run_id
249293

250294

251295
class ExperimentResult:
@@ -314,6 +358,7 @@ class ExperimentResult:
314358

315359
def __init__(
316360
self,
361+
*,
317362
name: str,
318363
description: Optional[str],
319364
item_results: List[ExperimentItemResult],
@@ -938,7 +983,7 @@ async def _run_evaluator(
938983

939984
# Normalize to list
940985
if isinstance(result, (dict, Evaluation)):
941-
return [result]
986+
return [result] # type: ignore
942987

943988
elif isinstance(result, list):
944989
return result

0 commit comments

Comments
 (0)