@@ -114,8 +114,9 @@ class Evaluation:
114114 metadata: Optional structured metadata about the evaluation process.
115115 Can include confidence scores, intermediate calculations, model versions,
116116 or any other relevant technical details.
117- data_type: Optional score data type, required if value is not NUMERIC; one of NUMERIC, CATEGORICAL, or BOOLEAN; default: NUMERIC
118- config_id: Optional Langfuse score config id
117+ data_type: Optional score data type. Required if value is not NUMERIC.
118+ One of NUMERIC, CATEGORICAL, or BOOLEAN. Defaults to NUMERIC.
119+ config_id: Optional Langfuse score config ID.
119120
120121 Examples:
121122 Basic accuracy evaluation:
@@ -194,12 +195,12 @@ def __init__(
194195 """Initialize an Evaluation with the provided data.
195196
196197 Args:
197- name: Unique identifier for the evaluation metric
198- value: The evaluation score or result
199- comment: Optional human-readable explanation of the result
200- metadata: Optional structured metadata about the evaluation process
201- data_type: Optional score data type (NUMERIC, CATEGORICAL, or BOOLEAN)
202- config_id: Optional Langfuse score config id
198+ name: Unique identifier for the evaluation metric.
199+ value: The evaluation score or result.
200+ comment: Optional human-readable explanation of the result.
201+ metadata: Optional structured metadata about the evaluation process.
202+ data_type: Optional score data type (NUMERIC, CATEGORICAL, or BOOLEAN).
203+ config_id: Optional Langfuse score config ID.
203204
204205 Note:
205206 All arguments must be provided as keywords. Positional arguments will raise a TypeError.
@@ -276,11 +277,11 @@ def __init__(
276277 """Initialize an ExperimentItemResult with the provided data.
277278
278279 Args:
279- item: The original experiment item that was processed
280- output: The actual output produced by the task function for this item
281- evaluations: List of evaluation results for this item
282- trace_id: Optional Langfuse trace ID for this item's execution
283- dataset_run_id: Optional dataset run ID if this item was part of a Langfuse dataset
280+ item: The original experiment item that was processed.
281+ output: The actual output produced by the task function for this item.
282+ evaluations: List of evaluation results for this item.
283+ trace_id: Optional Langfuse trace ID for this item's execution.
284+ dataset_run_id: Optional dataset run ID if this item was part of a Langfuse dataset.
284285
285286 Note:
286287 All arguments must be provided as keywords. Positional arguments will raise a TypeError.
@@ -300,14 +301,15 @@ class ExperimentResult:
300301 about the experiment execution.
301302
302303 Attributes:
303- name: The name of the experiment as specified during execution
304- description: Optional description of the experiment's purpose or methodology
304+ name: The name of the experiment as specified during execution.
305+ run_name: The name of the current experiment run.
306+ description: Optional description of the experiment's purpose or methodology.
305307 item_results: List of results from processing each individual dataset item,
306- containing the original item, task output, evaluations, and trace information
308+ containing the original item, task output, evaluations, and trace information.
307309 run_evaluations: List of aggregate evaluation results computed across all items,
308- such as average scores, statistical summaries, or cross-item analyses
309- dataset_run_id: Optional ID of the dataset run in Langfuse (when using Langfuse datasets)
310- dataset_run_url: Optional direct URL to view the experiment results in Langfuse UI
310+ such as average scores, statistical summaries, or cross-item analyses.
311+ dataset_run_id: Optional ID of the dataset run in Langfuse (when using Langfuse datasets).
312+ dataset_run_url: Optional direct URL to view the experiment results in Langfuse UI.
311313
312314 Examples:
313315 Basic usage with local dataset:
@@ -360,6 +362,7 @@ def __init__(
360362 self ,
361363 * ,
362364 name : str ,
365+ run_name : str ,
363366 description : Optional [str ],
364367 item_results : List [ExperimentItemResult ],
365368 run_evaluations : List [Evaluation ],
@@ -369,14 +372,16 @@ def __init__(
369372 """Initialize an ExperimentResult with the provided data.
370373
371374 Args:
372- name: The name of the experiment
373- description: Optional description of the experiment
374- item_results: List of results from processing individual dataset items
375- run_evaluations: List of aggregate evaluation results for the entire run
376- dataset_run_id: Optional ID of the dataset run (for Langfuse datasets)
377- dataset_run_url: Optional URL to view results in Langfuse UI
375+ name: The name of the experiment.
376+ run_name: The current experiment run name.
377+ description: Optional description of the experiment.
378+ item_results: List of results from processing individual dataset items.
379+ run_evaluations: List of aggregate evaluation results for the entire run.
380+ dataset_run_id: Optional ID of the dataset run (for Langfuse datasets).
381+ dataset_run_url: Optional URL to view results in Langfuse UI.
378382 """
379383 self .name = name
384+ self .run_name = run_name
380385 self .description = description
381386 self .item_results = item_results
382387 self .run_evaluations = run_evaluations
@@ -526,7 +531,8 @@ def format(self, *, include_item_results: bool = False) -> str:
526531
527532 # Experiment overview section
528533 output += f"\\ n{ '─' * 50 } \\ n"
529- output += f"📊 { self .name } "
534+ output += f"🧪 Experiment: { self .name } "
535+ output += f"\n 📋 Run name: { self .run_name } "
530536 if self .description :
531537 output += f" - { self .description } "
532538
0 commit comments