Skip to content

Commit f6ab661

Browse files
committed
add str method
1 parent 9902def commit f6ab661

1 file changed

Lines changed: 83 additions & 0 deletions

File tree

langfuse/batch_evaluation.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,89 @@ def __init__(
756756
self.error_summary = error_summary
757757
self.has_more_items = has_more_items
758758

759+
def __str__(self) -> str:
760+
"""Return a formatted string representation of the batch evaluation results.
761+
762+
Returns:
763+
A multi-line string with a summary of the evaluation results.
764+
"""
765+
lines = []
766+
lines.append("=" * 60)
767+
lines.append("Batch Evaluation Results")
768+
lines.append("=" * 60)
769+
770+
# Summary statistics
771+
lines.append(f"\nStatus: {'Completed' if self.completed else 'Incomplete'}")
772+
lines.append(f"Duration: {self.duration_seconds:.2f}s")
773+
lines.append(f"\nItems fetched: {self.total_items_fetched}")
774+
lines.append(f"Items processed: {self.total_items_processed}")
775+
776+
if self.total_items_failed > 0:
777+
lines.append(f"Items failed: {self.total_items_failed}")
778+
779+
# Success rate
780+
if self.total_items_fetched > 0:
781+
success_rate = self.total_items_processed / self.total_items_fetched * 100
782+
lines.append(f"Success rate: {success_rate:.1f}%")
783+
784+
# Scores created
785+
lines.append(f"\nScores created: {self.total_scores_created}")
786+
if self.total_composite_scores_created > 0:
787+
lines.append(f"Composite scores: {self.total_composite_scores_created}")
788+
789+
total_scores = self.total_scores_created + self.total_composite_scores_created
790+
lines.append(f"Total scores: {total_scores}")
791+
792+
# Evaluator statistics
793+
if self.evaluator_stats:
794+
lines.append("\nEvaluator Performance:")
795+
for stats in self.evaluator_stats:
796+
lines.append(f" {stats.name}:")
797+
if stats.total_runs > 0:
798+
success_rate = (
799+
stats.successful_runs / stats.total_runs * 100
800+
if stats.total_runs > 0
801+
else 0
802+
)
803+
lines.append(
804+
f" Runs: {stats.successful_runs}/{stats.total_runs} "
805+
f"({success_rate:.1f}% success)"
806+
)
807+
lines.append(f" Scores created: {stats.total_scores_created}")
808+
if stats.failed_runs > 0:
809+
lines.append(f" Failed runs: {stats.failed_runs}")
810+
811+
# Performance metrics
812+
if self.total_items_processed > 0 and self.duration_seconds > 0:
813+
items_per_sec = self.total_items_processed / self.duration_seconds
814+
lines.append("\nPerformance:")
815+
lines.append(f" Throughput: {items_per_sec:.2f} items/second")
816+
if self.total_scores_created > 0:
817+
avg_scores = self.total_scores_created / self.total_items_processed
818+
lines.append(f" Avg scores per item: {avg_scores:.2f}")
819+
820+
# Errors and warnings
821+
if self.error_summary:
822+
lines.append("\nErrors encountered:")
823+
for error_type, count in self.error_summary.items():
824+
lines.append(f" {error_type}: {count}")
825+
826+
# Incomplete run information
827+
if not self.completed:
828+
lines.append("\nWarning: Evaluation incomplete")
829+
if self.resume_token:
830+
lines.append(
831+
f" Last processed: {self.resume_token.last_processed_timestamp}"
832+
)
833+
lines.append(f" Items processed: {self.resume_token.items_processed}")
834+
lines.append(" Use resume_from parameter to continue")
835+
836+
if self.has_more_items:
837+
lines.append("\nNote: More items available beyond max_items limit")
838+
839+
lines.append("=" * 60)
840+
return "\n".join(lines)
841+
759842

760843
class BatchEvaluationRunner:
761844
"""Handles batch evaluation execution for a Langfuse client.

0 commit comments

Comments
 (0)