@@ -756,6 +756,89 @@ def __init__(
756756 self .error_summary = error_summary
757757 self .has_more_items = has_more_items
758758
759+ def __str__ (self ) -> str :
760+ """Return a formatted string representation of the batch evaluation results.
761+
762+ Returns:
763+ A multi-line string with a summary of the evaluation results.
764+ """
765+ lines = []
766+ lines .append ("=" * 60 )
767+ lines .append ("Batch Evaluation Results" )
768+ lines .append ("=" * 60 )
769+
770+ # Summary statistics
771+ lines .append (f"\n Status: { 'Completed' if self .completed else 'Incomplete' } " )
772+ lines .append (f"Duration: { self .duration_seconds :.2f} s" )
773+ lines .append (f"\n Items fetched: { self .total_items_fetched } " )
774+ lines .append (f"Items processed: { self .total_items_processed } " )
775+
776+ if self .total_items_failed > 0 :
777+ lines .append (f"Items failed: { self .total_items_failed } " )
778+
779+ # Success rate
780+ if self .total_items_fetched > 0 :
781+ success_rate = self .total_items_processed / self .total_items_fetched * 100
782+ lines .append (f"Success rate: { success_rate :.1f} %" )
783+
784+ # Scores created
785+ lines .append (f"\n Scores created: { self .total_scores_created } " )
786+ if self .total_composite_scores_created > 0 :
787+ lines .append (f"Composite scores: { self .total_composite_scores_created } " )
788+
789+ total_scores = self .total_scores_created + self .total_composite_scores_created
790+ lines .append (f"Total scores: { total_scores } " )
791+
792+ # Evaluator statistics
793+ if self .evaluator_stats :
794+ lines .append ("\n Evaluator Performance:" )
795+ for stats in self .evaluator_stats :
796+ lines .append (f" { stats .name } :" )
797+ if stats .total_runs > 0 :
798+ success_rate = (
799+ stats .successful_runs / stats .total_runs * 100
800+ if stats .total_runs > 0
801+ else 0
802+ )
803+ lines .append (
804+ f" Runs: { stats .successful_runs } /{ stats .total_runs } "
805+ f"({ success_rate :.1f} % success)"
806+ )
807+ lines .append (f" Scores created: { stats .total_scores_created } " )
808+ if stats .failed_runs > 0 :
809+ lines .append (f" Failed runs: { stats .failed_runs } " )
810+
811+ # Performance metrics
812+ if self .total_items_processed > 0 and self .duration_seconds > 0 :
813+ items_per_sec = self .total_items_processed / self .duration_seconds
814+ lines .append ("\n Performance:" )
815+ lines .append (f" Throughput: { items_per_sec :.2f} items/second" )
816+ if self .total_scores_created > 0 :
817+ avg_scores = self .total_scores_created / self .total_items_processed
818+ lines .append (f" Avg scores per item: { avg_scores :.2f} " )
819+
820+ # Errors and warnings
821+ if self .error_summary :
822+ lines .append ("\n Errors encountered:" )
823+ for error_type , count in self .error_summary .items ():
824+ lines .append (f" { error_type } : { count } " )
825+
826+ # Incomplete run information
827+ if not self .completed :
828+ lines .append ("\n Warning: Evaluation incomplete" )
829+ if self .resume_token :
830+ lines .append (
831+ f" Last processed: { self .resume_token .last_processed_timestamp } "
832+ )
833+ lines .append (f" Items processed: { self .resume_token .items_processed } " )
834+ lines .append (" Use resume_from parameter to continue" )
835+
836+ if self .has_more_items :
837+ lines .append ("\n Note: More items available beyond max_items limit" )
838+
839+ lines .append ("=" * 60 )
840+ return "\n " .join (lines )
841+
759842
760843class BatchEvaluationRunner :
761844 """Handles batch evaluation execution for a Langfuse client.
0 commit comments