Skip to content

Commit 46e9969

Browse files
committed
Add build_query tests and document coverage gaps in embedding_services
1 parent 3824d81 commit 46e9969

2 files changed

Lines changed: 201 additions & 2 deletions

File tree

server/api/services/embedding_services.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import logging
33
from statistics import median
44

5+
# filter() only does ADD logic
56
from django.db.models import Q
67
from pgvector.django import L2Distance
78

server/api/services/test_embedding_services.py

Lines changed: 200 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,175 @@
11
from unittest.mock import MagicMock, patch
22

3-
from api.services.embedding_services import evaluate_query, log_usage
3+
from django.db.models import Q
44

5+
from api.services.embedding_services import build_query, evaluate_query, log_usage
6+
7+
# ---------------------------------------------------------------------------
8+
# build_query tests
9+
#
10+
# build_query only constructs a lazy Django QuerySet — it never evaluates it
11+
# (no iteration, .get(), .exists(), etc.), so no database is needed.
12+
#
13+
# We patch Embeddings.objects so every chained ORM call (.filter, .annotate,
14+
# .order_by, __getitem__) returns a MagicMock instead of hitting the DB.
15+
# All assertions inspect which methods were called with which arguments.
16+
# ---------------------------------------------------------------------------
17+
18+
# Only forwarded to L2Distance
19+
EMBEDDING_VECTOR = [0.1, 0.2, 0.3]
20+
21+
# Test authenticated/unauthenticated user access control
22+
23+
@patch("api.services.embedding_services.Embeddings.objects")
24+
def test_build_query_authenticated_uses_or_filter(mock_objects):
25+
# An authenticated user should see their own files OR files uploaded by a
26+
# superuser. The initial filter must use an OR-connected Q expression.
27+
user = MagicMock(is_authenticated=True)
28+
29+
build_query(user, EMBEDDING_VECTOR)
30+
31+
# Q objects support equality comparison in pure Python — no DB needed.
32+
expected_q = Q(upload_file__uploaded_by=user) | Q(upload_file__uploaded_by__is_superuser=True)
33+
actual_q = mock_objects.filter.call_args.args[0]
34+
assert actual_q == expected_q
35+
36+
37+
@patch("api.services.embedding_services.Embeddings.objects")
38+
def test_build_query_unauthenticated_uses_superuser_only_filter(mock_objects):
39+
# An unauthenticated user may only see files uploaded by superusers.
40+
# The OR branch for the user's own files must NOT be present.
41+
user = MagicMock(is_authenticated=False)
42+
43+
build_query(user, EMBEDDING_VECTOR)
44+
45+
expected_q = Q(upload_file__uploaded_by__is_superuser=True)
46+
actual_q = mock_objects.filter.call_args.args[0]
47+
assert actual_q == expected_q
48+
49+
# Test application of annotate and order_by
50+
51+
# TODO: Strengthen test_build_query_annotates_and_orders_by_distance to also
52+
# assert the *arguments* to annotate — specifically that it receives
53+
# distance=L2Distance("embedding_sentence_transformers", EMBEDDING_VECTOR).
54+
# Currently only the call count is checked, so a wrong field name or a
55+
# dropped vector would go undetected.
56+
57+
@patch("api.services.embedding_services.Embeddings.objects")
58+
def test_build_query_annotates_and_orders_by_distance(mock_objects):
59+
# Regardless of other arguments, annotate(distance=L2Distance(...)) and
60+
# order_by("distance") must always be applied to the queryset.
61+
user = MagicMock(is_authenticated=True)
62+
63+
build_query(user, EMBEDDING_VECTOR)
64+
65+
# Retrieve the mock chain that .filter() returned, then check its methods.
66+
filtered_qs = mock_objects.filter.return_value
67+
filtered_qs.annotate.assert_called_once()
68+
filtered_qs.annotate.return_value.order_by.assert_called_once_with("distance")
69+
70+
# Test guid-over-document precedence logic
71+
72+
@patch("api.services.embedding_services.Embeddings.objects")
73+
def test_build_query_no_document_filter_when_both_none(mock_objects):
74+
# When neither guid nor document_name is provided, only the access-control
75+
# filter should fire — no secondary filter call for a document.
76+
user = MagicMock(is_authenticated=True)
77+
78+
build_query(user, EMBEDDING_VECTOR, document_name=None, guid=None)
79+
80+
# Exactly one filter call: the auth/access-control filter.
81+
assert mock_objects.filter.call_count == 1
82+
83+
84+
85+
@patch("api.services.embedding_services.Embeddings.objects")
86+
def test_build_query_guid_takes_precedence_over_document_name(mock_objects):
87+
# When both guid and document_name are provided, the guid branch runs and
88+
# the document_name branch is skipped entirely (only two filter calls total).
89+
user = MagicMock(is_authenticated=True)
90+
91+
build_query(user, EMBEDDING_VECTOR, guid="abc-123", document_name="study.pdf")
92+
93+
# Two calls: auth filter + guid filter. No third call for document_name.
94+
assert mock_objects.filter.call_count == 2
95+
96+
# The second filter must use upload_file__guid, not name.
97+
# We follow the mock chain to the queryset that .annotate().order_by() returned.
98+
ordered_qs = mock_objects.filter.return_value.annotate.return_value.order_by.return_value
99+
ordered_qs.filter.assert_called_once_with(upload_file__guid="abc-123")
100+
101+
102+
@patch("api.services.embedding_services.Embeddings.objects")
103+
def test_build_query_guid_filter_applied(mock_objects):
104+
# When only guid is given, a second filter on upload_file__guid is applied.
105+
user = MagicMock(is_authenticated=True)
106+
107+
build_query(user, EMBEDDING_VECTOR, guid="doc-guid-456")
108+
109+
ordered_qs = mock_objects.filter.return_value.annotate.return_value.order_by.return_value
110+
ordered_qs.filter.assert_called_once_with(upload_file__guid="doc-guid-456")
111+
112+
113+
@patch("api.services.embedding_services.Embeddings.objects")
114+
def test_build_query_document_name_filter_applied(mock_objects):
115+
# When only document_name is given (guid is None), a second filter on
116+
# name is applied instead of upload_file__guid.
117+
user = MagicMock(is_authenticated=True)
118+
119+
build_query(user, EMBEDDING_VECTOR, document_name="study.pdf", guid=None)
120+
121+
ordered_qs = mock_objects.filter.return_value.annotate.return_value.order_by.return_value
122+
ordered_qs.filter.assert_called_once_with(name="study.pdf")
123+
124+
125+
@patch("api.services.embedding_services.Embeddings.objects")
126+
def test_build_query_empty_string_guid_falls_back_to_document_name(mock_objects):
127+
# An empty-string guid is falsy in Python, so it should not trigger the
128+
# guid branch. The document_name filter should fire instead. This guards
129+
# against callers passing guid="" from an unset form field.
130+
user = MagicMock(is_authenticated=True)
131+
132+
build_query(user, EMBEDDING_VECTOR, guid="", document_name="fallback.pdf")
133+
134+
ordered_qs = mock_objects.filter.return_value.annotate.return_value.order_by.return_value
135+
ordered_qs.filter.assert_called_once_with(name="fallback.pdf")
136+
137+
# Cover LIMIT slicing
138+
139+
@patch("api.services.embedding_services.Embeddings.objects")
140+
def test_build_query_respects_num_results(mock_objects):
141+
# num_results controls the SQL LIMIT via queryset slicing. Verify that a
142+
# non-default value propagates correctly to the __getitem__ call.
143+
user = MagicMock(is_authenticated=True)
144+
145+
build_query(user, EMBEDDING_VECTOR, num_results=5)
146+
147+
# Django translates qs[:5] into qs.__getitem__(slice(None, 5, None)).
148+
ordered_qs = mock_objects.filter.return_value.annotate.return_value.order_by.return_value
149+
ordered_qs.__getitem__.assert_called_once_with(slice(None, 5, None))
150+
151+
@patch("api.services.embedding_services.Embeddings.objects")
152+
def test_build_query_returns_unevaluated_queryset(mock_objects):
153+
# build_query must NOT evaluate the queryset (no list(), no iteration).
154+
# The return value should be the mock produced by the final __getitem__ call.
155+
user = MagicMock(is_authenticated=True)
156+
157+
result = build_query(user, EMBEDDING_VECTOR)
158+
159+
ordered_qs = mock_objects.filter.return_value.annotate.return_value.order_by.return_value
160+
assert result is ordered_qs.__getitem__.return_value
161+
assert not isinstance(result, list)
162+
163+
164+
# ---------------------------------------------------------------------------
165+
# evaluate_query tests
166+
# ---------------------------------------------------------------------------
167+
168+
# TODO: Add test for empty queryset — evaluate_query([]) should return [].
5169

6170
def test_evaluate_query_maps_fields():
171+
# Verify that each Embeddings model attribute is mapped to the correct
172+
# output dict key. Note the rename: obj.page_num -> result["page_number"].
7173
obj = MagicMock()
8174
obj.name = "doc.pdf"
9175
obj.text = "some text"
@@ -27,6 +193,8 @@ def test_evaluate_query_maps_fields():
27193

28194

29195
def test_evaluate_query_none_upload_file():
196+
# When upload_file is None (e.g. the FK was deleted), file_id must be None
197+
# rather than raising an AttributeError on None.guid.
30198
obj = MagicMock()
31199
obj.name = "doc.pdf"
32200
obj.text = "some text"
@@ -39,9 +207,26 @@ def test_evaluate_query_none_upload_file():
39207

40208
assert results[0]["file_id"] is None
41209

210+
# ---------------------------------------------------------------------------
211+
# log_usage tests
212+
# ---------------------------------------------------------------------------
213+
214+
# TODO: Add test for empty results list — log_usage([]) hits the else branch and
215+
# should call SemanticSearchUsage.objects.create with num_results_returned=0
216+
# and max_distance=None, median_distance=None, min_distance=None.
217+
218+
# TODO: Add test for unauthenticated user — user.is_authenticated=False should
219+
# result in user=None being stored in the SemanticSearchUsage record.
220+
221+
# TODO: Add test for user=None — passing None directly as the user argument
222+
# should also store user=None (the expression `user if (user and
223+
# user.is_authenticated) else None` handles both cases, but only the
224+
# authenticated path is currently exercised).
42225

43226
@patch("api.services.embedding_services.SemanticSearchUsage.objects.create")
44227
def test_log_usage_computes_distance_stats(mock_create):
228+
# Verify min, max, and median are computed correctly from the distance
229+
# values in the results list and forwarded to the DB record.
45230
results = [{"distance": 1.0}, {"distance": 3.0}, {"distance": 2.0}]
46231
user = MagicMock(is_authenticated=True)
47232

@@ -69,10 +254,12 @@ def test_log_usage_computes_distance_stats(mock_create):
69254
side_effect=Exception("DB error"),
70255
)
71256
def test_log_usage_swallows_exceptions(mock_create):
257+
# log_usage must not propagate exceptions — a logging failure should never
258+
# interrupt the caller's search flow.
259+
# pytest fails the test if it catches unhandled Exception
72260
results = [{"distance": 1.0}]
73261
user = MagicMock(is_authenticated=True)
74262

75-
# pytest fails the test if it catches unhandled Exception
76263
log_usage(
77264
results,
78265
message_data="test query",
@@ -83,3 +270,14 @@ def test_log_usage_swallows_exceptions(mock_create):
83270
encoding_time=0.1,
84271
db_query_time=0.2,
85272
)
273+
274+
275+
# ---------------------------------------------------------------------------
276+
# get_closest_embeddings tests
277+
# ---------------------------------------------------------------------------
278+
279+
# TODO: Add smoke test for get_closest_embeddings verifying the wiring between
280+
# its three steps: encode → build_query → evaluate_query → log_usage.
281+
# Patch TransformerModel.get_instance, build_query, evaluate_query, and
282+
# log_usage. Assert that evaluate_query receives the queryset returned by
283+
# build_query, and that the function returns evaluate_query's result.

0 commit comments

Comments
 (0)