Skip to content

Commit 52f7d80

Browse files
committed
push
1 parent f94dab3 commit 52f7d80

3 files changed

Lines changed: 58 additions & 17 deletions

File tree

langfuse/_client/client.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2726,6 +2726,8 @@ async def process_item(item: ExperimentItem) -> dict:
27262726
self.flush()
27272727

27282728
return {
2729+
"name": name,
2730+
"description": description,
27292731
"item_results": valid_results,
27302732
"run_evaluations": run_evaluations,
27312733
"dataset_run_id": dataset_run_id,

langfuse/_client/experiments.py

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ class ExperimentResult(TypedDict):
189189
dataset_run_url: URL to view the dataset run in Langfuse UI
190190
"""
191191

192+
name: str
193+
description: Optional[str]
192194
item_results: List[ExperimentItemResult]
193195
run_evaluations: List[Evaluation]
194196
dataset_run_id: Optional[str]
@@ -578,27 +580,62 @@ def performance_distribution(*, item_results, **kwargs):
578580
...
579581

580582

581-
def format_experiment_results(
582-
item_results: List[ExperimentItemResult],
583-
run_evaluations: List[Evaluation],
584-
experiment_name: str,
585-
experiment_description: Optional[str] = None,
586-
dataset_run_url: Optional[str] = None,
583+
def format_experiment_result(
584+
experiment_result: ExperimentResult,
585+
*,
587586
include_item_results: bool = False,
588587
) -> str:
589-
"""Format experiment results for display.
588+
"""Format an experiment result for human-readable display.
589+
590+
Takes an ExperimentResult object and converts it into a nicely formatted
591+
string suitable for console output or logging. The output includes experiment
592+
overview, aggregate statistics, and optionally individual item details.
590593
591594
Args:
592-
item_results: Results from processing each item
593-
run_evaluations: Results from run-level evaluators
594-
experiment_name: Name of the experiment
595-
experiment_description: Optional description of the experiment
596-
dataset_run_url: Optional URL to dataset run in Langfuse UI
597-
include_item_results: Whether to include individual item details
595+
experiment_result: Complete experiment result containing name, description,
596+
item results, run evaluations, and dataset run information.
597+
include_item_results: Whether to include detailed results for each individual
598+
item in the output. When False (default), only shows aggregate statistics.
599+
Set to True to see input/output/scores for every processed item.
598600
599601
Returns:
600-
Formatted string representation of the results
602+
A formatted multi-line string containing:
603+
- Experiment name and description
604+
- Number of items processed
605+
- List of evaluation metrics used
606+
- Average scores across all items
607+
- Run-level evaluation results
608+
- Dataset run URL (if available)
609+
- Individual item details (if include_item_results=True)
610+
611+
Examples:
612+
Basic usage with aggregate results only:
613+
```python
614+
result = langfuse.run_experiment(...)
615+
print(format_experiment_result(result))
616+
```
617+
618+
Detailed output including individual items:
619+
```python
620+
result = langfuse.run_experiment(...)
621+
detailed_report = format_experiment_result(
622+
result,
623+
include_item_results=True
624+
)
625+
print(detailed_report)
626+
```
627+
628+
Save formatted results to file:
629+
```python
630+
result = dataset.run_experiment(...)
631+
with open("experiment_report.txt", "w") as f:
632+
f.write(format_experiment_result(result, include_item_results=True))
633+
```
601634
"""
635+
item_results = experiment_result["item_results"]
636+
run_evaluations = experiment_result["run_evaluations"]
637+
dataset_run_url = experiment_result["dataset_run_url"]
638+
602639
if not item_results:
603640
return "No experiment results to display."
604641

@@ -651,9 +688,9 @@ def format_experiment_results(
651688

652689
# Experiment Overview
653690
output += f"\n{'─' * 50}\n"
654-
output += f"📊 {experiment_name}"
655-
if experiment_description:
656-
output += f" - {experiment_description}"
691+
output += f"📊 {experiment_result['name']}"
692+
if experiment_result["description"]:
693+
output += f" - {experiment_result['description']}"
657694

658695
output += f"\n{len(item_results)} items"
659696

langfuse/experiment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
RunEvaluatorFunction,
1010
TaskFunction,
1111
create_evaluator_from_autoevals,
12+
format_experiment_result,
1213
)
1314

1415
__all__ = [
@@ -22,4 +23,5 @@
2223
"EvaluatorFunction",
2324
"RunEvaluatorFunction",
2425
"create_evaluator_from_autoevals",
26+
"format_experiment_result",
2527
]

0 commit comments

Comments
 (0)