Skip to content

Commit 4dd3b72

Browse files
sahilds1TineoC
authored andcommitted
Add server/api/services/test_evals.py
1 parent 8e6bd48 commit 4dd3b72

1 file changed

Lines changed: 53 additions & 0 deletions

File tree

server/api/services/test_evals.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
2+
from unittest.mock import patch, MagicMock
3+
4+
import pytest
5+
import pandas as pd
6+
7+
from evals import evaluate_response
8+
9+
class MockTokenUsage:
10+
def __init__(self, input_tokens, output_tokens):
11+
self.input_tokens = input_tokens
12+
self.output_tokens = output_tokens
13+
14+
@patch("evals.ModelFactory.get_handler")
15+
@patch("evals.Extractiveness.compute")
16+
def test_evaluate_response(mock_extractiveness_compute, mock_get_handler):
17+
18+
# Mock BaseModelHandler
19+
mock_handler = MagicMock()
20+
mock_handler.handle_request.return_value = (
21+
"This is a summary.",
22+
MockTokenUsage(input_tokens=100, output_tokens=50),
23+
{"input": 15.0, "output": 30.0}, # $15 and $30 per 1M tokens
24+
1.23, # duration
25+
)
26+
27+
mock_get_handler.return_value = mock_handler
28+
29+
mock_extractiveness_compute.return_value = {
30+
"summarization_coverage": 0.8,
31+
"summarization_density": 1.5,
32+
"summarization_compression": 2.0,
33+
}
34+
35+
df = evaluate_response(
36+
model_name="mock-model",
37+
query="What is the summary?",
38+
context="This is a long article about something important.",
39+
reference="This is a reference summary.",
40+
)
41+
42+
assert isinstance(df, pd.DataFrame)
43+
assert df.shape == (1, 8)
44+
assert df["Output Text"].iloc[0] == "This is a summary."
45+
assert df["Extractiveness Coverage"].iloc[0] == 0.8
46+
assert df["Extractiveness Density"].iloc[0] == 1.5
47+
assert df["Extractiveness Compression"].iloc[0] == 2.0
48+
assert df["Input Token Usage"].iloc[0] == 100
49+
assert df["Output Token Usage"].iloc[0] == 50
50+
51+
expected_cost = (15.0 / 1_000_000) * 100 + (30.0 / 1_000_000) * 50
52+
assert pytest.approx(df["Cost (USD)"].iloc[0], rel=1e-4) == expected_cost
53+
assert pytest.approx(df["Duration (s)"].iloc[0], rel=1e-4) == 1.23

0 commit comments

Comments
 (0)