@@ -2886,95 +2886,96 @@ async def _process_experiment_item(
28862886 )
28872887 raise e
28882888
2889- # Run evaluators
2890- evaluations = []
2889+ # Run evaluators
2890+ evaluations = []
28912891
2892- for evaluator in evaluators :
2893- try :
2894- eval_metadata : Optional [Dict [str , Any ]] = None
2892+ for evaluator in evaluators :
2893+ try :
2894+ eval_metadata : Optional [Dict [str , Any ]] = None
28952895
2896- if isinstance (item , dict ):
2897- eval_metadata = item .get ("metadata" )
2898- elif hasattr (item , "metadata" ):
2899- eval_metadata = item .metadata
2896+ if isinstance (item , dict ):
2897+ eval_metadata = item .get ("metadata" )
2898+ elif hasattr (item , "metadata" ):
2899+ eval_metadata = item .metadata
29002900
2901- eval_results = await _run_evaluator (
2902- evaluator ,
2903- input = input_data ,
2904- output = output ,
2905- expected_output = expected_output ,
2906- metadata = eval_metadata ,
2901+ eval_results = await _run_evaluator (
2902+ evaluator ,
2903+ input = input_data ,
2904+ output = output ,
2905+ expected_output = expected_output ,
2906+ metadata = eval_metadata ,
2907+ )
2908+ evaluations .extend (eval_results )
2909+
2910+ # Store evaluations as scores
2911+ for evaluation in eval_results :
2912+ self .create_score (
2913+ trace_id = trace_id ,
2914+ observation_id = span .id ,
2915+ name = evaluation .name ,
2916+ value = evaluation .value , # type: ignore
2917+ comment = evaluation .comment ,
2918+ metadata = evaluation .metadata ,
2919+ config_id = evaluation .config_id ,
2920+ data_type = evaluation .data_type , # type: ignore
29072921 )
2908- evaluations .extend (eval_results )
2909-
2910- # Store evaluations as scores
2911- for evaluation in eval_results :
2912- self .create_score (
2913- trace_id = trace_id ,
2914- observation_id = span .id ,
2915- name = evaluation .name ,
2916- value = evaluation .value , # type: ignore
2917- comment = evaluation .comment ,
2918- metadata = evaluation .metadata ,
2919- config_id = evaluation .config_id ,
2920- data_type = evaluation .data_type , # type: ignore
2921- )
29222922
2923- except Exception as e :
2924- langfuse_logger .error (f"Evaluator failed: { e } " )
2923+ except Exception as e :
2924+ langfuse_logger .error (f"Evaluator failed: { e } " )
29252925
2926- # Run composite evaluator if provided and we have evaluations
2927- if composite_evaluator and evaluations :
2928- try :
2929- composite_eval_metadata : Optional [Dict [str , Any ]] = None
2930- if isinstance (item , dict ):
2931- composite_eval_metadata = item .get ("metadata" )
2932- elif hasattr (item , "metadata" ):
2933- composite_eval_metadata = item .metadata
29342926
2935- result = composite_evaluator (
2936- input = input_data ,
2937- output = output ,
2938- expected_output = expected_output ,
2939- metadata = composite_eval_metadata ,
2940- evaluations = evaluations ,
2941- )
2927+ # Run composite evaluator if provided and we have evaluations
2928+ if composite_evaluator and evaluations :
2929+ try :
2930+ composite_eval_metadata : Optional [Dict [str , Any ]] = None
2931+ if isinstance (item , dict ):
2932+ composite_eval_metadata = item .get ("metadata" )
2933+ elif hasattr (item , "metadata" ):
2934+ composite_eval_metadata = item .metadata
29422935
2943- # Handle async composite evaluators
2944- if asyncio .iscoroutine (result ):
2945- result = await result
2946-
2947- # Normalize to list
2948- composite_evals : List [Evaluation ] = []
2949- if isinstance (result , (dict , Evaluation )):
2950- composite_evals = [result ] # type: ignore
2951- elif isinstance (result , list ):
2952- composite_evals = result # type: ignore
2953-
2954- # Store composite evaluations as scores and add to evaluations list
2955- for composite_evaluation in composite_evals :
2956- self .create_score (
2957- trace_id = trace_id ,
2958- observation_id = span .id ,
2959- name = composite_evaluation .name ,
2960- value = composite_evaluation .value , # type: ignore
2961- comment = composite_evaluation .comment ,
2962- metadata = composite_evaluation .metadata ,
2963- config_id = composite_evaluation .config_id ,
2964- data_type = composite_evaluation .data_type , # type: ignore
2965- )
2966- evaluations .append (composite_evaluation )
2936+ result = composite_evaluator (
2937+ input = input_data ,
2938+ output = output ,
2939+ expected_output = expected_output ,
2940+ metadata = composite_eval_metadata ,
2941+ evaluations = evaluations ,
2942+ )
29672943
2968- except Exception as e :
2969- langfuse_logger .error (f"Composite evaluator failed: { e } " )
2944+ # Handle async composite evaluators
2945+ if asyncio .iscoroutine (result ):
2946+ result = await result
29702947
2971- return ExperimentItemResult (
2972- item = item ,
2973- output = output ,
2974- evaluations = evaluations ,
2975- trace_id = trace_id ,
2976- dataset_run_id = dataset_run_id ,
2977- )
2948+ # Normalize to list
2949+ composite_evals : List [Evaluation ] = []
2950+ if isinstance (result , (dict , Evaluation )):
2951+ composite_evals = [result ] # type: ignore
2952+ elif isinstance (result , list ):
2953+ composite_evals = result # type: ignore
2954+
2955+ # Store composite evaluations as scores and add to evaluations list
2956+ for composite_evaluation in composite_evals :
2957+ self .create_score (
2958+ trace_id = trace_id ,
2959+ observation_id = span .id ,
2960+ name = composite_evaluation .name ,
2961+ value = composite_evaluation .value , # type: ignore
2962+ comment = composite_evaluation .comment ,
2963+ metadata = composite_evaluation .metadata ,
2964+ config_id = composite_evaluation .config_id ,
2965+ data_type = composite_evaluation .data_type , # type: ignore
2966+ )
2967+ evaluations .append (composite_evaluation )
2968+
2969+ except Exception as e :
2970+ langfuse_logger .error (f"Composite evaluator failed: { e } " )
2971+
2972+ return ExperimentItemResult (
2973+ item = item ,
2974+ output = output ,
2975+ evaluations = evaluations ,
2976+ trace_id = trace_id ,
2977+ dataset_run_id = dataset_run_id ,
2978+ )
29782979
29792980 def _create_experiment_run_name (
29802981 self , * , name : Optional [str ] = None , run_name : Optional [str ] = None
0 commit comments