@@ -2427,6 +2427,7 @@ def run_experiment(
24272427 - run_name: The experiment run name. This is equal to the dataset run name if experiment was on Langfuse dataset.
24282428 - item_results: List of results for each processed item with outputs and evaluations
24292429 - run_evaluations: List of aggregate evaluation results for the entire run
2430+ - experiment_id: Stable identifier for the experiment run across all items
24302431 - dataset_run_id: ID of the dataset run (if using Langfuse datasets)
24312432 - dataset_run_url: Direct URL to view results in Langfuse UI (if applicable)
24322433
@@ -2577,6 +2578,8 @@ async def _run_experiment_async(
25772578 f"Starting experiment '{ name } ' run '{ run_name } ' with { len (data )} items"
25782579 )
25792580
2581+ shared_fallback_experiment_id = self ._create_observation_id ()
2582+
25802583 # Set up concurrency control
25812584 semaphore = asyncio .Semaphore (max_concurrency )
25822585
@@ -2588,6 +2591,7 @@ async def process_item(item: ExperimentItem) -> ExperimentItemResult:
25882591 task ,
25892592 evaluators ,
25902593 composite_evaluator ,
2594+ shared_fallback_experiment_id ,
25912595 name ,
25922596 run_name ,
25932597 description ,
@@ -2619,7 +2623,14 @@ async def process_item(item: ExperimentItem) -> ExperimentItemResult:
26192623 langfuse_logger .error (f"Run evaluator failed: { e } " )
26202624
26212625 # Generate dataset run URL if applicable
2622- dataset_run_id = valid_results [0 ].dataset_run_id if valid_results else None
2626+ dataset_run_id = next (
2627+ (
2628+ result .dataset_run_id
2629+ for result in valid_results
2630+ if result .dataset_run_id
2631+ ),
2632+ None ,
2633+ )
26232634 dataset_run_url = None
26242635 if dataset_run_id and data :
26252636 try :
@@ -2665,6 +2676,7 @@ async def process_item(item: ExperimentItem) -> ExperimentItemResult:
26652676 description = description ,
26662677 item_results = valid_results ,
26672678 run_evaluations = run_evaluations ,
2679+ experiment_id = dataset_run_id or shared_fallback_experiment_id ,
26682680 dataset_run_id = dataset_run_id ,
26692681 dataset_run_url = dataset_run_url ,
26702682 )
@@ -2675,6 +2687,7 @@ async def _process_experiment_item(
26752687 task : Callable ,
26762688 evaluators : List [Callable ],
26772689 composite_evaluator : Optional [CompositeEvaluatorFunction ],
2690+ fallback_experiment_id : str ,
26782691 experiment_name : str ,
26792692 experiment_run_name : str ,
26802693 experiment_description : Optional [str ],
@@ -2753,7 +2766,7 @@ async def _process_experiment_item(
27532766 if isinstance (item_metadata , dict ):
27542767 final_observation_metadata .update (item_metadata )
27552768
2756- experiment_id = dataset_run_id or self . _create_observation_id ()
2769+ experiment_id = dataset_run_id or fallback_experiment_id
27572770 experiment_item_id = (
27582771 dataset_item_id or get_sha256_hash_hex (_serialize (input_data ))[:16 ]
27592772 )
0 commit comments