Skip to content

Commit da756c5

Browse files
authored
Merge branch 'main' into add-batch-evals
2 parents 06a6e37 + 94b0211 commit da756c5

3 files changed

Lines changed: 82 additions & 81 deletions

File tree

langfuse/_client/client.py

Lines changed: 80 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -2886,95 +2886,96 @@ async def _process_experiment_item(
28862886
)
28872887
raise e
28882888

2889-
# Run evaluators
2890-
evaluations = []
2889+
# Run evaluators
2890+
evaluations = []
28912891

2892-
for evaluator in evaluators:
2893-
try:
2894-
eval_metadata: Optional[Dict[str, Any]] = None
2892+
for evaluator in evaluators:
2893+
try:
2894+
eval_metadata: Optional[Dict[str, Any]] = None
28952895

2896-
if isinstance(item, dict):
2897-
eval_metadata = item.get("metadata")
2898-
elif hasattr(item, "metadata"):
2899-
eval_metadata = item.metadata
2896+
if isinstance(item, dict):
2897+
eval_metadata = item.get("metadata")
2898+
elif hasattr(item, "metadata"):
2899+
eval_metadata = item.metadata
29002900

2901-
eval_results = await _run_evaluator(
2902-
evaluator,
2903-
input=input_data,
2904-
output=output,
2905-
expected_output=expected_output,
2906-
metadata=eval_metadata,
2901+
eval_results = await _run_evaluator(
2902+
evaluator,
2903+
input=input_data,
2904+
output=output,
2905+
expected_output=expected_output,
2906+
metadata=eval_metadata,
2907+
)
2908+
evaluations.extend(eval_results)
2909+
2910+
# Store evaluations as scores
2911+
for evaluation in eval_results:
2912+
self.create_score(
2913+
trace_id=trace_id,
2914+
observation_id=span.id,
2915+
name=evaluation.name,
2916+
value=evaluation.value, # type: ignore
2917+
comment=evaluation.comment,
2918+
metadata=evaluation.metadata,
2919+
config_id=evaluation.config_id,
2920+
data_type=evaluation.data_type, # type: ignore
29072921
)
2908-
evaluations.extend(eval_results)
2909-
2910-
# Store evaluations as scores
2911-
for evaluation in eval_results:
2912-
self.create_score(
2913-
trace_id=trace_id,
2914-
observation_id=span.id,
2915-
name=evaluation.name,
2916-
value=evaluation.value, # type: ignore
2917-
comment=evaluation.comment,
2918-
metadata=evaluation.metadata,
2919-
config_id=evaluation.config_id,
2920-
data_type=evaluation.data_type, # type: ignore
2921-
)
29222922

2923-
except Exception as e:
2924-
langfuse_logger.error(f"Evaluator failed: {e}")
2923+
except Exception as e:
2924+
langfuse_logger.error(f"Evaluator failed: {e}")
29252925

2926-
# Run composite evaluator if provided and we have evaluations
2927-
if composite_evaluator and evaluations:
2928-
try:
2929-
composite_eval_metadata: Optional[Dict[str, Any]] = None
2930-
if isinstance(item, dict):
2931-
composite_eval_metadata = item.get("metadata")
2932-
elif hasattr(item, "metadata"):
2933-
composite_eval_metadata = item.metadata
29342926

2935-
result = composite_evaluator(
2936-
input=input_data,
2937-
output=output,
2938-
expected_output=expected_output,
2939-
metadata=composite_eval_metadata,
2940-
evaluations=evaluations,
2941-
)
2927+
# Run composite evaluator if provided and we have evaluations
2928+
if composite_evaluator and evaluations:
2929+
try:
2930+
composite_eval_metadata: Optional[Dict[str, Any]] = None
2931+
if isinstance(item, dict):
2932+
composite_eval_metadata = item.get("metadata")
2933+
elif hasattr(item, "metadata"):
2934+
composite_eval_metadata = item.metadata
29422935

2943-
# Handle async composite evaluators
2944-
if asyncio.iscoroutine(result):
2945-
result = await result
2946-
2947-
# Normalize to list
2948-
composite_evals: List[Evaluation] = []
2949-
if isinstance(result, (dict, Evaluation)):
2950-
composite_evals = [result] # type: ignore
2951-
elif isinstance(result, list):
2952-
composite_evals = result # type: ignore
2953-
2954-
# Store composite evaluations as scores and add to evaluations list
2955-
for composite_evaluation in composite_evals:
2956-
self.create_score(
2957-
trace_id=trace_id,
2958-
observation_id=span.id,
2959-
name=composite_evaluation.name,
2960-
value=composite_evaluation.value, # type: ignore
2961-
comment=composite_evaluation.comment,
2962-
metadata=composite_evaluation.metadata,
2963-
config_id=composite_evaluation.config_id,
2964-
data_type=composite_evaluation.data_type, # type: ignore
2965-
)
2966-
evaluations.append(composite_evaluation)
2936+
result = composite_evaluator(
2937+
input=input_data,
2938+
output=output,
2939+
expected_output=expected_output,
2940+
metadata=composite_eval_metadata,
2941+
evaluations=evaluations,
2942+
)
29672943

2968-
except Exception as e:
2969-
langfuse_logger.error(f"Composite evaluator failed: {e}")
2944+
# Handle async composite evaluators
2945+
if asyncio.iscoroutine(result):
2946+
result = await result
29702947

2971-
return ExperimentItemResult(
2972-
item=item,
2973-
output=output,
2974-
evaluations=evaluations,
2975-
trace_id=trace_id,
2976-
dataset_run_id=dataset_run_id,
2977-
)
2948+
# Normalize to list
2949+
composite_evals: List[Evaluation] = []
2950+
if isinstance(result, (dict, Evaluation)):
2951+
composite_evals = [result] # type: ignore
2952+
elif isinstance(result, list):
2953+
composite_evals = result # type: ignore
2954+
2955+
# Store composite evaluations as scores and add to evaluations list
2956+
for composite_evaluation in composite_evals:
2957+
self.create_score(
2958+
trace_id=trace_id,
2959+
observation_id=span.id,
2960+
name=composite_evaluation.name,
2961+
value=composite_evaluation.value, # type: ignore
2962+
comment=composite_evaluation.comment,
2963+
metadata=composite_evaluation.metadata,
2964+
config_id=composite_evaluation.config_id,
2965+
data_type=composite_evaluation.data_type, # type: ignore
2966+
)
2967+
evaluations.append(composite_evaluation)
2968+
2969+
except Exception as e:
2970+
langfuse_logger.error(f"Composite evaluator failed: {e}")
2971+
2972+
return ExperimentItemResult(
2973+
item=item,
2974+
output=output,
2975+
evaluations=evaluations,
2976+
trace_id=trace_id,
2977+
dataset_run_id=dataset_run_id,
2978+
)
29782979

29792980
def _create_experiment_run_name(
29802981
self, *, name: Optional[str] = None, run_name: Optional[str] = None

langfuse/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""@private"""
22

3-
__version__ = "3.9.2"
3+
__version__ = "3.9.3"

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[tool.poetry]
22
name = "langfuse"
33

4-
version = "3.9.2"
4+
version = "3.9.3"
55
description = "A client library for accessing langfuse"
66
authors = ["langfuse <developers@langfuse.com>"]
77
license = "MIT"

0 commit comments

Comments
 (0)