implements referenced documents on /query

thoraxe · thoraxe · commit c1c008435570 · 2025-08-14T11:35:26.000-04:00
diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py
@@ -1,10 +1,12 @@
 """Handler for REST API call to provide answer to query."""
 
+import ast
 from datetime import datetime, UTC
 import json
 import logging
 import os
 from pathlib import Path
+import re
 from typing import Annotated, Any
 
 from llama_stack_client import APIConnectionError
@@ -41,6 +43,8 @@
 router = APIRouter(tags=["query"])
 auth_dependency = get_auth_dependency()
 
+METADATA_PATTERN = re.compile(r"\nMetadata: (\{.+})\n")
+
 query_response: dict[int | str, dict[str, Any]] = {
     200: {
         "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
@@ -189,7 +193,7 @@ async def query_endpoint_handler(
                 user_conversation=user_conversation, query_request=query_request
             ),
         )
-        response, conversation_id = await retrieve_response(
+        response, conversation_id, referenced_documents = await retrieve_response(
             client,
             llama_stack_model_id,
             query_request,
@@ -223,7 +227,11 @@ async def query_endpoint_handler(
             provider_id=provider_id,
         )
 
-        return QueryResponse(conversation_id=conversation_id, response=response)
+        return QueryResponse(
+            conversation_id=conversation_id, 
+            response=response,
+            referenced_documents=referenced_documents
+        )
 
     # connection to Llama Stack server
     except APIConnectionError as e:
@@ -322,7 +330,7 @@ async def retrieve_response(  # pylint: disable=too-many-locals
     query_request: QueryRequest,
     token: str,
     mcp_headers: dict[str, dict[str, str]] | None = None,
-) -> tuple[str, str]:
+) -> tuple[str, str, list[dict[str, str]]]:
     """Retrieve response from LLMs and agents."""
     available_input_shields = [
         shield.identifier
@@ -402,15 +410,42 @@ async def retrieve_response(  # pylint: disable=too-many-locals
         toolgroups=toolgroups,
     )
 
-    # Check for validation errors in the response
+    # Collect metadata from tool responses to extract referenced documents
+    metadata_map: dict[str, dict[str, Any]] = {}
     steps = getattr(response, "steps", [])
     for step in steps:
         if step.step_type == "shield_call" and step.violation:
             # Metric for LLM validation errors
             metrics.llm_calls_validation_errors_total.inc()
-            break
+        elif step.step_type == "tool_execution" and hasattr(step, "tool_responses"):
+            for tool_response in step.tool_responses:
+                if tool_response.tool_name == "knowledge_search" and tool_response.content:
+                    for text_content_item in tool_response.content:
+                        if hasattr(text_content_item, 'text'):
+                            for match in METADATA_PATTERN.findall(text_content_item.text):
+                                try:
+                                    meta = ast.literal_eval(match)
+                                    if "document_id" in meta:
+                                        metadata_map[meta["document_id"]] = meta
+                                except Exception:  # pylint: disable=broad-except
+                                    logger.debug(
+                                        "An exception was thrown in processing %s",
+                                        match,
+                                    )
+
+    # Extract referenced documents from metadata
+    referenced_documents = [
+        {
+            "doc_url": v["docs_url"],
+            "doc_title": v["title"],
+        }
+        for v in filter(
+            lambda v: ("docs_url" in v) and ("title" in v),
+            metadata_map.values(),
+        )
+    ]
 
-    return str(response.output_message.content), conversation_id  # type: ignore[union-attr]
+    return str(response.output_message.content), conversation_id, referenced_documents  # type: ignore[union-attr]
 
 
 def validate_attachments_metadata(attachments: list[Attachment]) -> None:
diff --git a/src/models/responses.py b/src/models/responses.py
@@ -36,8 +36,6 @@ class ModelsResponse(BaseModel):
 
 # TODO(lucasagomes): a lot of fields to add to QueryResponse. For now
 # we are keeping it simple. The missing fields are:
-# - referenced_documents: The optional URLs and titles for the documents used
-#   to generate the response.
 # - truncated: Set to True if conversation history was truncated to be within context window.
 # - input_tokens: Number of tokens sent to LLM
 # - output_tokens: Number of tokens received from LLM
@@ -51,6 +49,8 @@ class QueryResponse(BaseModel):
     Attributes:
         conversation_id: The optional conversation ID (UUID).
         response: The response.
+        referenced_documents: The optional URLs and titles for the documents used
+            to generate the response.
     """
 
     conversation_id: Optional[str] = Field(
@@ -65,6 +65,19 @@ class QueryResponse(BaseModel):
             "Kubernetes is an open-source container orchestration system for automating ..."
         ],
     )
+    
+    referenced_documents: list[dict[str, str]] = Field(
+        default_factory=list,
+        description="List of documents referenced in generating the response",
+        examples=[
+            [
+                {
+                    "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html",
+                    "doc_title": "Operator Lifecycle Manager (OLM)"
+                }
+            ]
+        ],
+    )
 
     # provides examples for /docs endpoint
     model_config = {
@@ -73,6 +86,12 @@ class QueryResponse(BaseModel):
                 {
                     "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
                     "response": "Operator Lifecycle Manager (OLM) helps users install...",
+                    "referenced_documents": [
+                        {
+                            "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html",
+                            "doc_title": "Operator Lifecycle Manager (OLM)"
+                        }
+                    ]
                 }
             ]
         }
diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py