fix openai_services. add endpoint to use openAI to extract rules

taichan03 · taichan03 · commit 03ede4a53c52 · 2025-05-28T19:04:39.000-04:00
diff --git a/server/api/services/openai_services.py b/server/api/services/openai_services.py
@@ -6,49 +6,64 @@
 class openAIServices:
     @staticmethod
     def openAI(userMessage, prompt, model=None, temp=None, stream=False, raw_stream=False):
-        # Initialize the OpenAI client
-        try:
-            client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-
-            if model is None:
-                model = "gpt-4o-mini"
-            if temp is None:
-                temp = 0.2
-
-            if stream:
-
-                request_params = {
-                    "model": model,
-                    "temperature": temp,
-                    "messages": [
-                        {"role": "system", "content": prompt},
-                        {"role": "user", "content": userMessage}
-                    ],
-                    "stream": stream
-                }
-                response = client.chat.completions.create(**request_params)
-
-                for chunk in response:
-                    if raw_stream:
-                        # Return the entire chunk as JSON
-                        yield json.dumps(chunk.model_dump())
-                    else:
-                        # Extract only the content from the delta
-                        if chunk.choices and len(chunk.choices) > 0:
-                            delta = chunk.choices[0].delta
-                            if hasattr(delta, 'content') and delta.content:
-                                yield delta.content
+        if stream:
+            return openAIServices._openAI_streaming(userMessage, prompt, model, temp, raw_stream)
+        else:
+            return openAIServices._openAI_non_streaming(userMessage, prompt, model, temp)
+
+    @staticmethod
+    def _openAI_non_streaming(userMessage, prompt, model=None, temp=None):
+        client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+        if model is None:
+            model = "gpt-4o-mini"
+        if temp is None:
+            temp = 0.2
+
+        request_params = {
+            "model": model,
+            "temperature": temp,
+            "messages": [
+                {"role": "system", "content": prompt},
+                {"role": "user", "content": userMessage}
+            ],
+        }
+
+        response = client.chat.completions.create(**request_params)
+        message_content = response.choices[0].message.content
+        print("OpenAI response content:", repr(message_content))
+
+        if not message_content:
+            raise ValueError("LLM returned empty content")
+
+        return message_content
+
+    @staticmethod
+    def _openAI_streaming(userMessage, prompt, model=None, temp=None, raw_stream=False):
+        client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+        if model is None:
+            model = "gpt-4o-mini"
+        if temp is None:
+            temp = 0.2
+
+        request_params = {
+            "model": model,
+            "temperature": temp,
+            "messages": [
+                {"role": "system", "content": prompt},
+                {"role": "user", "content": userMessage}
+            ],
+            "stream": True
+        }
+
+        response = client.chat.completions.create(**request_params)
+
+        for chunk in response:
+            if raw_stream:
+                yield json.dumps(chunk.model_dump())
             else:
-                request_params = {
-                    "model": model,
-                    "temperature": temp,
-                    "messages": [
-                        {"role": "system", "content": prompt},
-                        {"role": "user", "content": userMessage}
-                    ],
-                }
-                response = client.chat.completions.create(**request_params)
-                return response.choices[0].message.content
-        except Exception as e:
-            print(f"Error: {e}")
-            raise
+                if chunk.choices and len(chunk.choices) > 0:
+                    delta = chunk.choices[0].delta
+                    if hasattr(delta, 'content') and delta.content:
+                        yield delta.content
diff --git a/server/api/views/text_extraction/urls.py b/server/api/views/text_extraction/urls.py
@@ -1,9 +1,11 @@
 from django.urls import path
-from .views import RuleExtractionAPIView
+from .views import RuleExtractionAPIView, RuleExtractionAPIOpenAIView
 
 
 urlpatterns = [
 
     path('v1/api/rule_extraction', RuleExtractionAPIView.as_view(),
-         name='rule_extraction')
+         name='rule_extraction'),
+    path('v1/api/rule_extraction_openai', RuleExtractionAPIOpenAIView.as_view(),
+         name='rule_extraction_openai')
 ]
diff --git a/server/api/views/text_extraction/views.py b/server/api/views/text_extraction/views.py
@@ -1,5 +1,5 @@
 import os
-
+from ...services.openai_services import openAIServices
 from rest_framework.views import APIView
 from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
@@ -12,7 +12,7 @@
 
 
 # TODO: Add docstrings and type hints
-def anthropic_citations(client, content_chunks, user_prompt): 
+def anthropic_citations(client, content_chunks, user_prompt):
     """
     """
 
@@ -31,7 +31,7 @@ def anthropic_citations(client, content_chunks, user_prompt):
                         },
                         "citations": {"enabled": True}
                     },
-    
+
                     {
                         "type": "text",
                         "text": user_prompt
@@ -41,16 +41,17 @@ def anthropic_citations(client, content_chunks, user_prompt):
         ],
     )
 
-    
     # Response Structure: https://docs.anthropic.com/en/docs/build-with-claude/citations#response-structure
-            
+
     text = []
     cited_text = []
     for content in message.to_dict()['content']:
         text.append(content['text'])
         if 'citations' in content.keys():
-            text.append(" ".join([f"<{citation['start_block_index']} - {citation['end_block_index']}>" for citation in content['citations']]))
-            cited_text.append(" ".join([f"<{citation['start_block_index']} - {citation['end_block_index']}> {citation['cited_text']}" for citation in content['citations']]))
+            text.append(" ".join(
+                [f"<{citation['start_block_index']} - {citation['end_block_index']}>" for citation in content['citations']]))
+            cited_text.append(" ".join(
+                [f"<{citation['start_block_index']} - {citation['end_block_index']}> {citation['cited_text']}" for citation in content['citations']]))
 
     texts = " ".join(text)
     cited_texts = " ".join(cited_text)
@@ -66,22 +67,23 @@ class RuleExtractionAPIView(APIView):
     def get(self, request):
         try:
 
-            client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
-            
+            client = anthropic.Anthropic(
+                api_key=os.getenv("ANTHROPIC_API_KEY"))
+
             user_prompt = """
             I'm creating a system to analyze medical research. It processes peer-reviewed papers to extract key details
 
             Act as a seasoned physician or medical professional who treat patients with bipolar disorder
 
-            Identify rules for medication inclusion or exclusion based on medical history or concerns 
+            Identify rules for medication inclusion or exclusion based on medical history or concerns
 
             Return an output with the same structure as these examples:
 
-            The rule is history of suicide attempts. The type of rule is "INCLUDE". The reason is lithium is the 
+            The rule is history of suicide attempts. The type of rule is "INCLUDE". The reason is lithium is the
             only medication on the market that has been proven to reduce suicidality in patients with bipolar disorder.
             The medications for this rule are lithium.
 
-            The rule is weight gain concerns. The type of rule is "EXCLUDE". The reason is Seroquel, Risperdal, Abilify, and 
+            The rule is weight gain concerns. The type of rule is "EXCLUDE". The reason is Seroquel, Risperdal, Abilify, and
             Zyprexa are known for causing weight gain. The medications for this rule are Quetiapine, Aripiprazole, Olanzapine, Risperidone
             }
             """
@@ -92,10 +94,63 @@ def get(self, request):
 
             chunks = [{"type": "text", "text": chunk.text} for chunk in query]
 
-            texts, cited_texts = anthropic_citations(client, chunks, user_prompt)
-
+            texts, cited_texts = anthropic_citations(
+                client, chunks, user_prompt)
 
             return Response({"texts": texts, "cited_texts": cited_texts}, status=status.HTTP_200_OK)
 
         except Exception as e:
-            return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+            return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+
+
+# This is to use openai to extract the rules to save cost
+
+def openai_extraction(content_chunks, user_prompt):
+    """
+    Prepares the OpenAI input and returns the extracted text.
+    """
+
+    combined_text = "\n\n".join(chunk['text'] for chunk in content_chunks)
+
+    result = openAIServices.openAI(
+        userMessage=combined_text,
+        prompt=user_prompt,
+        model="gpt-4o-mini",
+        temp=0.0,
+        stream=False
+    )
+    return result
+
+
+@method_decorator(csrf_exempt, name='dispatch')
+class RuleExtractionAPIOpenAIView(APIView):
+    permission_classes = [IsAuthenticated]
+
+    def get(self, request):
+        try:
+            user_prompt = """
+            You're analyzing medical text from multiple sources. Each chunk is labeled [chunk-X].
+
+            Act as a seasoned physician or medical professional who treats patients with bipolar disorder.
+
+            Identify rules for medication inclusion or exclusion based on medical history or concerns.
+
+            Return each rule with this exact structure:
+            The rule is __. The type of rule is "__". The reason is __. The medications for this rule are __. Source: [chunk-X]
+
+            Only use chunks provided. If no rule is found in a chunk, skip it.
+            """
+
+            guid = request.query_params.get('guid')
+            query = Embeddings.objects.filter(upload_file__guid=guid)
+            chunks = [
+                {"type": "text", "text": f"[chunk-{i}] {chunk.text}"}
+                for i, chunk in enumerate(query)
+            ]
+
+            output_text = openai_extraction(chunks, user_prompt)
+
+            return Response({"text": output_text}, status=status.HTTP_200_OK)
+
+        except Exception as e:
+            return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)