1+
2+ from unittest .mock import patch , MagicMock
3+
4+ import pytest
5+ import pandas as pd
6+
7+ from evals import evaluate_response
8+
9+ class MockTokenUsage :
10+ def __init__ (self , input_tokens , output_tokens ):
11+ self .input_tokens = input_tokens
12+ self .output_tokens = output_tokens
13+
14+ @patch ("evals.ModelFactory.get_handler" )
15+ @patch ("evals.Extractiveness.compute" )
16+ def test_evaluate_response (mock_extractiveness_compute , mock_get_handler ):
17+
18+ # Mock BaseModelHandler
19+ mock_handler = MagicMock ()
20+ mock_handler .handle_request .return_value = (
21+ "This is a summary." ,
22+ MockTokenUsage (input_tokens = 100 , output_tokens = 50 ),
23+ {"input" : 15.0 , "output" : 30.0 }, # $15 and $30 per 1M tokens
24+ 1.23 , # duration
25+ )
26+
27+ mock_get_handler .return_value = mock_handler
28+
29+ mock_extractiveness_compute .return_value = {
30+ "summarization_coverage" : 0.8 ,
31+ "summarization_density" : 1.5 ,
32+ "summarization_compression" : 2.0 ,
33+ }
34+
35+ df = evaluate_response (
36+ model_name = "mock-model" ,
37+ query = "What is the summary?" ,
38+ context = "This is a long article about something important." ,
39+ reference = "This is a reference summary." ,
40+ )
41+
42+ assert isinstance (df , pd .DataFrame )
43+ assert df .shape == (1 , 8 )
44+ assert df ["Output Text" ].iloc [0 ] == "This is a summary."
45+ assert df ["Extractiveness Coverage" ].iloc [0 ] == 0.8
46+ assert df ["Extractiveness Density" ].iloc [0 ] == 1.5
47+ assert df ["Extractiveness Compression" ].iloc [0 ] == 2.0
48+ assert df ["Input Token Usage" ].iloc [0 ] == 100
49+ assert df ["Output Token Usage" ].iloc [0 ] == 50
50+
51+ expected_cost = (15.0 / 1_000_000 ) * 100 + (30.0 / 1_000_000 ) * 50
52+ assert pytest .approx (df ["Cost (USD)" ].iloc [0 ], rel = 1e-4 ) == expected_cost
53+ assert pytest .approx (df ["Duration (s)" ].iloc [0 ], rel = 1e-4 ) == 1.23
0 commit comments