11from unittest .mock import MagicMock , patch
22
3- from api . services . embedding_services import evaluate_query , log_usage
3+ from django . db . models import Q
44
5+ from api .services .embedding_services import build_query , evaluate_query , log_usage
6+
7+ # ---------------------------------------------------------------------------
8+ # build_query tests
9+ #
10+ # build_query only constructs a lazy Django QuerySet — it never evaluates it
11+ # (no iteration, .get(), .exists(), etc.), so no database is needed.
12+ #
13+ # We patch Embeddings.objects so every chained ORM call (.filter, .annotate,
14+ # .order_by, __getitem__) returns a MagicMock instead of hitting the DB.
15+ # All assertions inspect which methods were called with which arguments.
16+ # ---------------------------------------------------------------------------
17+
18+ # Only forwarded to L2Distance
19+ EMBEDDING_VECTOR = [0.1 , 0.2 , 0.3 ]
20+
21+ # Test authenticated/unauthenticated user access control
22+
23+ @patch ("api.services.embedding_services.Embeddings.objects" )
24+ def test_build_query_authenticated_uses_or_filter (mock_objects ):
25+ # An authenticated user should see their own files OR files uploaded by a
26+ # superuser. The initial filter must use an OR-connected Q expression.
27+ user = MagicMock (is_authenticated = True )
28+
29+ build_query (user , EMBEDDING_VECTOR )
30+
31+ # Q objects support equality comparison in pure Python — no DB needed.
32+ expected_q = Q (upload_file__uploaded_by = user ) | Q (upload_file__uploaded_by__is_superuser = True )
33+ actual_q = mock_objects .filter .call_args .args [0 ]
34+ assert actual_q == expected_q
35+
36+
37+ @patch ("api.services.embedding_services.Embeddings.objects" )
38+ def test_build_query_unauthenticated_uses_superuser_only_filter (mock_objects ):
39+ # An unauthenticated user may only see files uploaded by superusers.
40+ # The OR branch for the user's own files must NOT be present.
41+ user = MagicMock (is_authenticated = False )
42+
43+ build_query (user , EMBEDDING_VECTOR )
44+
45+ expected_q = Q (upload_file__uploaded_by__is_superuser = True )
46+ actual_q = mock_objects .filter .call_args .args [0 ]
47+ assert actual_q == expected_q
48+
49+ # Test application of annotate and order_by
50+
51+ # TODO: Strengthen test_build_query_annotates_and_orders_by_distance to also
52+ # assert the *arguments* to annotate — specifically that it receives
53+ # distance=L2Distance("embedding_sentence_transformers", EMBEDDING_VECTOR).
54+ # Currently only the call count is checked, so a wrong field name or a
55+ # dropped vector would go undetected.
56+
57+ @patch ("api.services.embedding_services.Embeddings.objects" )
58+ def test_build_query_annotates_and_orders_by_distance (mock_objects ):
59+ # Regardless of other arguments, annotate(distance=L2Distance(...)) and
60+ # order_by("distance") must always be applied to the queryset.
61+ user = MagicMock (is_authenticated = True )
62+
63+ build_query (user , EMBEDDING_VECTOR )
64+
65+ # Retrieve the mock chain that .filter() returned, then check its methods.
66+ filtered_qs = mock_objects .filter .return_value
67+ filtered_qs .annotate .assert_called_once ()
68+ filtered_qs .annotate .return_value .order_by .assert_called_once_with ("distance" )
69+
70+ # Test guid-over-document precedence logic
71+
72+ @patch ("api.services.embedding_services.Embeddings.objects" )
73+ def test_build_query_no_document_filter_when_both_none (mock_objects ):
74+ # When neither guid nor document_name is provided, only the access-control
75+ # filter should fire — no secondary filter call for a document.
76+ user = MagicMock (is_authenticated = True )
77+
78+ build_query (user , EMBEDDING_VECTOR , document_name = None , guid = None )
79+
80+ # Exactly one filter call: the auth/access-control filter.
81+ assert mock_objects .filter .call_count == 1
82+
83+
84+
85+ @patch ("api.services.embedding_services.Embeddings.objects" )
86+ def test_build_query_guid_takes_precedence_over_document_name (mock_objects ):
87+ # When both guid and document_name are provided, the guid branch runs and
88+ # the document_name branch is skipped entirely (only two filter calls total).
89+ user = MagicMock (is_authenticated = True )
90+
91+ build_query (user , EMBEDDING_VECTOR , guid = "abc-123" , document_name = "study.pdf" )
92+
93+ # Two calls: auth filter + guid filter. No third call for document_name.
94+ assert mock_objects .filter .call_count == 2
95+
96+ # The second filter must use upload_file__guid, not name.
97+ # We follow the mock chain to the queryset that .annotate().order_by() returned.
98+ ordered_qs = mock_objects .filter .return_value .annotate .return_value .order_by .return_value
99+ ordered_qs .filter .assert_called_once_with (upload_file__guid = "abc-123" )
100+
101+
102+ @patch ("api.services.embedding_services.Embeddings.objects" )
103+ def test_build_query_guid_filter_applied (mock_objects ):
104+ # When only guid is given, a second filter on upload_file__guid is applied.
105+ user = MagicMock (is_authenticated = True )
106+
107+ build_query (user , EMBEDDING_VECTOR , guid = "doc-guid-456" )
108+
109+ ordered_qs = mock_objects .filter .return_value .annotate .return_value .order_by .return_value
110+ ordered_qs .filter .assert_called_once_with (upload_file__guid = "doc-guid-456" )
111+
112+
113+ @patch ("api.services.embedding_services.Embeddings.objects" )
114+ def test_build_query_document_name_filter_applied (mock_objects ):
115+ # When only document_name is given (guid is None), a second filter on
116+ # name is applied instead of upload_file__guid.
117+ user = MagicMock (is_authenticated = True )
118+
119+ build_query (user , EMBEDDING_VECTOR , document_name = "study.pdf" , guid = None )
120+
121+ ordered_qs = mock_objects .filter .return_value .annotate .return_value .order_by .return_value
122+ ordered_qs .filter .assert_called_once_with (name = "study.pdf" )
123+
124+
125+ @patch ("api.services.embedding_services.Embeddings.objects" )
126+ def test_build_query_empty_string_guid_falls_back_to_document_name (mock_objects ):
127+ # An empty-string guid is falsy in Python, so it should not trigger the
128+ # guid branch. The document_name filter should fire instead. This guards
129+ # against callers passing guid="" from an unset form field.
130+ user = MagicMock (is_authenticated = True )
131+
132+ build_query (user , EMBEDDING_VECTOR , guid = "" , document_name = "fallback.pdf" )
133+
134+ ordered_qs = mock_objects .filter .return_value .annotate .return_value .order_by .return_value
135+ ordered_qs .filter .assert_called_once_with (name = "fallback.pdf" )
136+
137+ # Cover LIMIT slicing
138+
139+ @patch ("api.services.embedding_services.Embeddings.objects" )
140+ def test_build_query_respects_num_results (mock_objects ):
141+ # num_results controls the SQL LIMIT via queryset slicing. Verify that a
142+ # non-default value propagates correctly to the __getitem__ call.
143+ user = MagicMock (is_authenticated = True )
144+
145+ build_query (user , EMBEDDING_VECTOR , num_results = 5 )
146+
147+ # Django translates qs[:5] into qs.__getitem__(slice(None, 5, None)).
148+ ordered_qs = mock_objects .filter .return_value .annotate .return_value .order_by .return_value
149+ ordered_qs .__getitem__ .assert_called_once_with (slice (None , 5 , None ))
150+
151+ @patch ("api.services.embedding_services.Embeddings.objects" )
152+ def test_build_query_returns_unevaluated_queryset (mock_objects ):
153+ # build_query must NOT evaluate the queryset (no list(), no iteration).
154+ # The return value should be the mock produced by the final __getitem__ call.
155+ user = MagicMock (is_authenticated = True )
156+
157+ result = build_query (user , EMBEDDING_VECTOR )
158+
159+ ordered_qs = mock_objects .filter .return_value .annotate .return_value .order_by .return_value
160+ assert result is ordered_qs .__getitem__ .return_value
161+ assert not isinstance (result , list )
162+
163+
164+ # ---------------------------------------------------------------------------
165+ # evaluate_query tests
166+ # ---------------------------------------------------------------------------
167+
168+ # TODO: Add test for empty queryset — evaluate_query([]) should return [].
5169
6170def test_evaluate_query_maps_fields ():
171+ # Verify that each Embeddings model attribute is mapped to the correct
172+ # output dict key. Note the rename: obj.page_num -> result["page_number"].
7173 obj = MagicMock ()
8174 obj .name = "doc.pdf"
9175 obj .text = "some text"
@@ -27,6 +193,8 @@ def test_evaluate_query_maps_fields():
27193
28194
29195def test_evaluate_query_none_upload_file ():
196+ # When upload_file is None (e.g. the FK was deleted), file_id must be None
197+ # rather than raising an AttributeError on None.guid.
30198 obj = MagicMock ()
31199 obj .name = "doc.pdf"
32200 obj .text = "some text"
@@ -39,9 +207,26 @@ def test_evaluate_query_none_upload_file():
39207
40208 assert results [0 ]["file_id" ] is None
41209
210+ # ---------------------------------------------------------------------------
211+ # log_usage tests
212+ # ---------------------------------------------------------------------------
213+
214+ # TODO: Add test for empty results list — log_usage([]) hits the else branch and
215+ # should call SemanticSearchUsage.objects.create with num_results_returned=0
216+ # and max_distance=None, median_distance=None, min_distance=None.
217+
218+ # TODO: Add test for unauthenticated user — user.is_authenticated=False should
219+ # result in user=None being stored in the SemanticSearchUsage record.
220+
221+ # TODO: Add test for user=None — passing None directly as the user argument
222+ # should also store user=None (the expression `user if (user and
223+ # user.is_authenticated) else None` handles both cases, but only the
224+ # authenticated path is currently exercised).
42225
43226@patch ("api.services.embedding_services.SemanticSearchUsage.objects.create" )
44227def test_log_usage_computes_distance_stats (mock_create ):
228+ # Verify min, max, and median are computed correctly from the distance
229+ # values in the results list and forwarded to the DB record.
45230 results = [{"distance" : 1.0 }, {"distance" : 3.0 }, {"distance" : 2.0 }]
46231 user = MagicMock (is_authenticated = True )
47232
@@ -69,10 +254,12 @@ def test_log_usage_computes_distance_stats(mock_create):
69254 side_effect = Exception ("DB error" ),
70255)
71256def test_log_usage_swallows_exceptions (mock_create ):
257+ # log_usage must not propagate exceptions — a logging failure should never
258+ # interrupt the caller's search flow.
259+ # pytest fails the test if it catches unhandled Exception
72260 results = [{"distance" : 1.0 }]
73261 user = MagicMock (is_authenticated = True )
74262
75- # pytest fails the test if it catches unhandled Exception
76263 log_usage (
77264 results ,
78265 message_data = "test query" ,
@@ -83,3 +270,14 @@ def test_log_usage_swallows_exceptions(mock_create):
83270 encoding_time = 0.1 ,
84271 db_query_time = 0.2 ,
85272 )
273+
274+
275+ # ---------------------------------------------------------------------------
276+ # get_closest_embeddings tests
277+ # ---------------------------------------------------------------------------
278+
279+ # TODO: Add smoke test for get_closest_embeddings verifying the wiring between
280+ # its three steps: encode → build_query → evaluate_query → log_usage.
281+ # Patch TransformerModel.get_instance, build_query, evaluate_query, and
282+ # log_usage. Assert that evaluate_query receives the queryset returned by
283+ # build_query, and that the function returns evaluate_query's result.
0 commit comments