Skip to content

Commit 32cbe02

Browse files
committed
add run_name
1 parent 13c42d9 commit 32cbe02

5 files changed

Lines changed: 99 additions & 67 deletions

File tree

langfuse/_client/client.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2463,6 +2463,7 @@ def run_experiment(
24632463
self,
24642464
*,
24652465
name: str,
2466+
run_name: Optional[str] = None,
24662467
description: Optional[str] = None,
24672468
data: ExperimentData,
24682469
task: TaskFunction,
@@ -2487,7 +2488,10 @@ def run_experiment(
24872488
24882489
Args:
24892490
name: Human-readable name for the experiment. Used for identification
2490-
in the Langfuse UI and for dataset run naming if using Langfuse datasets.
2491+
in the Langfuse UI.
2492+
run_name: Optional exact name for the experiment run. If provided, this will be
2493+
used as the exact dataset run name if the `data` contains Langfuse dataset items.
2494+
If not provided, this will default to the experiment name appended with an ISO timestamp.
24912495
description: Optional description explaining the experiment's purpose,
24922496
methodology, or expected outcomes.
24932497
data: Array of data items to process. Can be either:
@@ -2628,6 +2632,9 @@ def average_accuracy(*, item_results, **kwargs):
26282632
run_async_safely(
26292633
self._run_experiment_async(
26302634
name=name,
2635+
run_name=self._create_experiment_run_name(
2636+
name=name, run_name=run_name
2637+
),
26312638
description=description,
26322639
data=data,
26332640
task=task,
@@ -2643,6 +2650,7 @@ async def _run_experiment_async(
26432650
self,
26442651
*,
26452652
name: str,
2653+
run_name: str,
26462654
description: Optional[str],
26472655
data: ExperimentData,
26482656
task: TaskFunction,
@@ -2651,7 +2659,9 @@ async def _run_experiment_async(
26512659
max_concurrency: int,
26522660
metadata: Dict[str, Any],
26532661
) -> ExperimentResult:
2654-
langfuse_logger.debug(f"Starting experiment '{name}' with {len(data)} items")
2662+
langfuse_logger.debug(
2663+
f"Starting experiment '{name}' run '{run_name}' with {len(data)} items"
2664+
)
26552665

26562666
# Set up concurrency control
26572667
semaphore = asyncio.Semaphore(max_concurrency)
@@ -2660,7 +2670,7 @@ async def _run_experiment_async(
26602670
async def process_item(item: ExperimentItem) -> ExperimentItemResult:
26612671
async with semaphore:
26622672
return await self._process_experiment_item(
2663-
item, task, evaluators, name, description, metadata
2673+
item, task, evaluators, name, run_name, description, metadata
26642674
)
26652675

26662676
# Run all items concurrently
@@ -2728,6 +2738,7 @@ async def process_item(item: ExperimentItem) -> ExperimentItemResult:
27282738

27292739
return ExperimentResult(
27302740
name=name,
2741+
run_name=run_name,
27312742
description=description,
27322743
item_results=valid_results,
27332744
run_evaluations=run_evaluations,
@@ -2741,6 +2752,7 @@ async def _process_experiment_item(
27412752
task: Callable,
27422753
evaluators: List[Callable],
27432754
experiment_name: str,
2755+
experiment_run_name: str,
27442756
experiment_description: Optional[str],
27452757
experiment_metadata: Dict[str, Any],
27462758
) -> ExperimentItemResult:
@@ -2764,6 +2776,7 @@ async def _process_experiment_item(
27642776

27652777
final_metadata = {
27662778
"experiment_name": experiment_name,
2779+
"experiment_run_name": experiment_run_name,
27672780
**experiment_metadata,
27682781
}
27692782

@@ -2796,7 +2809,7 @@ async def _process_experiment_item(
27962809

27972810
dataset_run_item = self.api.dataset_run_items.create(
27982811
request=CreateDatasetRunItemRequest(
2799-
runName=experiment_name,
2812+
runName=experiment_run_name,
28002813
runDescription=experiment_description,
28012814
metadata=experiment_metadata,
28022815
datasetItemId=item.id, # type: ignore
@@ -2864,6 +2877,16 @@ async def _process_experiment_item(
28642877
)
28652878
raise e
28662879

2880+
def _create_experiment_run_name(
2881+
self, *, name: Optional[str] = None, run_name: Optional[str] = None
2882+
) -> str:
2883+
if run_name:
2884+
return run_name
2885+
2886+
iso_timestamp = _get_timestamp().isoformat().replace("+00:00", "Z")
2887+
2888+
return f"{name} - {iso_timestamp}"
2889+
28672890
def auth_check(self) -> bool:
28682891
"""Check if the provided credentials (public and secret key) are valid.
28692892

langfuse/_client/datasets.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from langfuse.experiment import (
88
EvaluatorFunction,
9+
ExperimentResult,
910
RunEvaluatorFunction,
1011
TaskFunction,
1112
)
@@ -199,13 +200,14 @@ def run_experiment(
199200
self,
200201
*,
201202
name: str,
203+
run_name: Optional[str] = None,
202204
description: Optional[str] = None,
203205
task: TaskFunction,
204206
evaluators: List[EvaluatorFunction] = [],
205207
run_evaluators: List[RunEvaluatorFunction] = [],
206208
max_concurrency: int = 50,
207209
metadata: Optional[Dict[str, Any]] = None,
208-
) -> Any:
210+
) -> ExperimentResult:
209211
"""Run an experiment on this Langfuse dataset with automatic tracking.
210212
211213
This is a convenience method that runs an experiment using all items in this
@@ -222,6 +224,9 @@ def run_experiment(
222224
Args:
223225
name: Human-readable name for the experiment run. This will be used as
224226
the dataset run name in Langfuse for tracking and identification.
227+
run_name: Optional exact name for the dataset run. If provided, this will be
228+
used as the exact dataset run name in Langfuse. If not provided, this will
229+
default to the experiment name appended with an ISO timestamp.
225230
description: Optional description of the experiment's purpose, methodology,
226231
or what you're testing. Appears in the Langfuse UI for context.
227232
task: Function that processes each dataset item and returns output.
@@ -238,12 +243,13 @@ def run_experiment(
238243
239244
Returns:
240245
ExperimentResult object containing:
241-
- name: The experiment name
242-
- description: Optional experiment description
243-
- item_results: Results for each dataset item with outputs and evaluations
244-
- run_evaluations: Aggregate evaluation results for the entire run
245-
- dataset_run_id: ID of the created dataset run in Langfuse
246-
- dataset_run_url: Direct URL to view the experiment results in Langfuse UI
246+
- name: The experiment name.
247+
- run_name: The experiment run name (equivalent to the dataset run name).
248+
- description: Optional experiment description.
249+
- item_results: Results for each dataset item with outputs and evaluations.
250+
- run_evaluations: Aggregate evaluation results for the entire run.
251+
- dataset_run_id: ID of the created dataset run in Langfuse.
252+
- dataset_run_url: Direct URL to view the experiment results in Langfuse UI.
247253
248254
The result object provides a format() method for human-readable output:
249255
```python
@@ -253,7 +259,7 @@ def run_experiment(
253259
```
254260
255261
Raises:
256-
ValueError: If the dataset has no items or no Langfuse client is available
262+
ValueError: If the dataset has no items or no Langfuse client is available.
257263
258264
Examples:
259265
Basic dataset experiment:
@@ -400,6 +406,7 @@ def content_diversity(*, item_results, **kwargs):
400406

401407
return langfuse_client.run_experiment(
402408
name=name,
409+
run_name=run_name,
403410
description=description,
404411
data=self.items,
405412
task=task,

langfuse/_client/span.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,19 +1468,19 @@ def start_as_current_generation(
14681468
return self.start_as_current_observation(
14691469
name=name,
14701470
as_type="generation",
1471-
input=input,
1472-
output=output,
1473-
metadata=metadata,
1474-
version=version,
1475-
level=level,
1476-
status_message=status_message,
1477-
completion_start_time=completion_start_time,
1478-
model=model,
1479-
model_parameters=model_parameters,
1480-
usage_details=usage_details,
1481-
cost_details=cost_details,
1482-
prompt=prompt,
1483-
)
1471+
input=input,
1472+
output=output,
1473+
metadata=metadata,
1474+
version=version,
1475+
level=level,
1476+
status_message=status_message,
1477+
completion_start_time=completion_start_time,
1478+
model=model,
1479+
model_parameters=model_parameters,
1480+
usage_details=usage_details,
1481+
cost_details=cost_details,
1482+
prompt=prompt,
1483+
)
14841484

14851485
def create_event(
14861486
self,

langfuse/experiment.py

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,9 @@ class Evaluation:
114114
metadata: Optional structured metadata about the evaluation process.
115115
Can include confidence scores, intermediate calculations, model versions,
116116
or any other relevant technical details.
117-
data_type: Optional score data type, required if value is not NUMERIC; one of NUMERIC, CATEGORICAL, or BOOLEAN; default: NUMERIC
118-
config_id: Optional Langfuse score config id
117+
data_type: Optional score data type. Required if value is not NUMERIC.
118+
One of NUMERIC, CATEGORICAL, or BOOLEAN. Defaults to NUMERIC.
119+
config_id: Optional Langfuse score config ID.
119120
120121
Examples:
121122
Basic accuracy evaluation:
@@ -194,12 +195,12 @@ def __init__(
194195
"""Initialize an Evaluation with the provided data.
195196
196197
Args:
197-
name: Unique identifier for the evaluation metric
198-
value: The evaluation score or result
199-
comment: Optional human-readable explanation of the result
200-
metadata: Optional structured metadata about the evaluation process
201-
data_type: Optional score data type (NUMERIC, CATEGORICAL, or BOOLEAN)
202-
config_id: Optional Langfuse score config id
198+
name: Unique identifier for the evaluation metric.
199+
value: The evaluation score or result.
200+
comment: Optional human-readable explanation of the result.
201+
metadata: Optional structured metadata about the evaluation process.
202+
data_type: Optional score data type (NUMERIC, CATEGORICAL, or BOOLEAN).
203+
config_id: Optional Langfuse score config ID.
203204
204205
Note:
205206
All arguments must be provided as keywords. Positional arguments will raise a TypeError.
@@ -276,11 +277,11 @@ def __init__(
276277
"""Initialize an ExperimentItemResult with the provided data.
277278
278279
Args:
279-
item: The original experiment item that was processed
280-
output: The actual output produced by the task function for this item
281-
evaluations: List of evaluation results for this item
282-
trace_id: Optional Langfuse trace ID for this item's execution
283-
dataset_run_id: Optional dataset run ID if this item was part of a Langfuse dataset
280+
item: The original experiment item that was processed.
281+
output: The actual output produced by the task function for this item.
282+
evaluations: List of evaluation results for this item.
283+
trace_id: Optional Langfuse trace ID for this item's execution.
284+
dataset_run_id: Optional dataset run ID if this item was part of a Langfuse dataset.
284285
285286
Note:
286287
All arguments must be provided as keywords. Positional arguments will raise a TypeError.
@@ -300,14 +301,15 @@ class ExperimentResult:
300301
about the experiment execution.
301302
302303
Attributes:
303-
name: The name of the experiment as specified during execution
304-
description: Optional description of the experiment's purpose or methodology
304+
name: The name of the experiment as specified during execution.
305+
run_name: The name of the current experiment run.
306+
description: Optional description of the experiment's purpose or methodology.
305307
item_results: List of results from processing each individual dataset item,
306-
containing the original item, task output, evaluations, and trace information
308+
containing the original item, task output, evaluations, and trace information.
307309
run_evaluations: List of aggregate evaluation results computed across all items,
308-
such as average scores, statistical summaries, or cross-item analyses
309-
dataset_run_id: Optional ID of the dataset run in Langfuse (when using Langfuse datasets)
310-
dataset_run_url: Optional direct URL to view the experiment results in Langfuse UI
310+
such as average scores, statistical summaries, or cross-item analyses.
311+
dataset_run_id: Optional ID of the dataset run in Langfuse (when using Langfuse datasets).
312+
dataset_run_url: Optional direct URL to view the experiment results in Langfuse UI.
311313
312314
Examples:
313315
Basic usage with local dataset:
@@ -360,6 +362,7 @@ def __init__(
360362
self,
361363
*,
362364
name: str,
365+
run_name: str,
363366
description: Optional[str],
364367
item_results: List[ExperimentItemResult],
365368
run_evaluations: List[Evaluation],
@@ -369,14 +372,16 @@ def __init__(
369372
"""Initialize an ExperimentResult with the provided data.
370373
371374
Args:
372-
name: The name of the experiment
373-
description: Optional description of the experiment
374-
item_results: List of results from processing individual dataset items
375-
run_evaluations: List of aggregate evaluation results for the entire run
376-
dataset_run_id: Optional ID of the dataset run (for Langfuse datasets)
377-
dataset_run_url: Optional URL to view results in Langfuse UI
375+
name: The name of the experiment.
376+
run_name: The current experiment run name.
377+
description: Optional description of the experiment.
378+
item_results: List of results from processing individual dataset items.
379+
run_evaluations: List of aggregate evaluation results for the entire run.
380+
dataset_run_id: Optional ID of the dataset run (for Langfuse datasets).
381+
dataset_run_url: Optional URL to view results in Langfuse UI.
378382
"""
379383
self.name = name
384+
self.run_name = run_name
380385
self.description = description
381386
self.item_results = item_results
382387
self.run_evaluations = run_evaluations
@@ -526,7 +531,8 @@ def format(self, *, include_item_results: bool = False) -> str:
526531

527532
# Experiment overview section
528533
output += f"\\n{'─' * 50}\\n"
529-
output += f"📊 {self.name}"
534+
output += f"🧪 Experiment: {self.name}"
535+
output += f"\n📋 Run name: {self.run_name}"
530536
if self.description:
531537
output += f" - {self.description}"
532538

0 commit comments

Comments
 (0)