@@ -189,6 +189,8 @@ class ExperimentResult(TypedDict):
189189 dataset_run_url: URL to view the dataset run in Langfuse UI
190190 """
191191
192+ name : str
193+ description : Optional [str ]
192194 item_results : List [ExperimentItemResult ]
193195 run_evaluations : List [Evaluation ]
194196 dataset_run_id : Optional [str ]
@@ -578,27 +580,62 @@ def performance_distribution(*, item_results, **kwargs):
578580 ...
579581
580582
581- def format_experiment_results (
582- item_results : List [ExperimentItemResult ],
583- run_evaluations : List [Evaluation ],
584- experiment_name : str ,
585- experiment_description : Optional [str ] = None ,
586- dataset_run_url : Optional [str ] = None ,
583+ def format_experiment_result (
584+ experiment_result : ExperimentResult ,
585+ * ,
587586 include_item_results : bool = False ,
588587) -> str :
589- """Format experiment results for display.
588+ """Format an experiment result for human-readable display.
589+
590+ Takes an ExperimentResult object and converts it into a nicely formatted
591+ string suitable for console output or logging. The output includes experiment
592+ overview, aggregate statistics, and optionally individual item details.
590593
591594 Args:
592- item_results: Results from processing each item
593- run_evaluations: Results from run-level evaluators
594- experiment_name: Name of the experiment
595- experiment_description: Optional description of the experiment
596- dataset_run_url: Optional URL to dataset run in Langfuse UI
597- include_item_results: Whether to include individual item details
595+ experiment_result: Complete experiment result containing name, description,
596+ item results, run evaluations, and dataset run information.
597+ include_item_results: Whether to include detailed results for each individual
598+ item in the output. When False (default), only shows aggregate statistics.
599+ Set to True to see input/output/scores for every processed item.
598600
599601 Returns:
600- Formatted string representation of the results
602+ A formatted multi-line string containing:
603+ - Experiment name and description
604+ - Number of items processed
605+ - List of evaluation metrics used
606+ - Average scores across all items
607+ - Run-level evaluation results
608+ - Dataset run URL (if available)
609+ - Individual item details (if include_item_results=True)
610+
611+ Examples:
612+ Basic usage with aggregate results only:
613+ ```python
614+ result = langfuse.run_experiment(...)
615+ print(format_experiment_result(result))
616+ ```
617+
618+ Detailed output including individual items:
619+ ```python
620+ result = langfuse.run_experiment(...)
621+ detailed_report = format_experiment_result(
622+ result,
623+ include_item_results=True
624+ )
625+ print(detailed_report)
626+ ```
627+
628+ Save formatted results to file:
629+ ```python
630+ result = dataset.run_experiment(...)
631+ with open("experiment_report.txt", "w") as f:
632+ f.write(format_experiment_result(result, include_item_results=True))
633+ ```
601634 """
635+ item_results = experiment_result ["item_results" ]
636+ run_evaluations = experiment_result ["run_evaluations" ]
637+ dataset_run_url = experiment_result ["dataset_run_url" ]
638+
602639 if not item_results :
603640 return "No experiment results to display."
604641
@@ -651,9 +688,9 @@ def format_experiment_results(
651688
652689 # Experiment Overview
653690 output += f"\n { '─' * 50 } \n "
654- output += f"📊 { experiment_name } "
655- if experiment_description :
656- output += f" - { experiment_description } "
691+ output += f"📊 { experiment_result [ 'name' ] } "
692+ if experiment_result [ "description" ] :
693+ output += f" - { experiment_result [ 'description' ] } "
657694
658695 output += f"\n { len (item_results )} items"
659696
0 commit comments