Skip to content

Commit 9fe61e3

Browse files
committed
Merge master branch into main with detailed README
2 parents f3efa9e + f521595 commit 9fe61e3

38 files changed

+10381
-1
lines changed

.bolt/config.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"template": "bolt-vite-react-ts"
3+
}

.bolt/prompt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
For all designs I ask you to make, have them be beautiful, not cookie cutter. Make webpages that are fully featured and worthy for production.
2+
3+
By default, this template supports JSX syntax with Tailwind CSS classes, React hooks, and Lucide React for icons. Do not install other packages for UI themes, icons, etc unless absolutely necessary or I request them.
4+
5+
Use icons from lucide-react for logos.
6+
7+
Use stock photos from unsplash where appropriate, only valid URLs you know exist. Do not download the images, only link to them in image tags.
8+

.env

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2+
VITE_SUPABASE_ANON_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Imhrc29jcXBvYWd0aGR3YWFpaGpmIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDEzNTgxNDgsImV4cCI6MjA1NjkzNDE0OH0.vo2irqrtwjbt66mBca9QUJFXVvLmCecgj1_9CgNTxiQ
3+
VITE_SUPABASE_URL=https://hksocqpoagthdwaaihjf.supabase.co

.gitignore

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Logs
2+
logs
3+
*.log
4+
npm-debug.log*
5+
yarn-debug.log*
6+
yarn-error.log*
7+
pnpm-debug.log*
8+
lerna-debug.log*
9+
10+
node_modules
11+
dist
12+
dist-ssr
13+
*.local
14+
15+
# Editor directories and files
16+
.vscode/*
17+
!.vscode/extensions.json
18+
.idea
19+
.DS_Store
20+
*.suo
21+
*.ntvs*
22+
*.njsproj
23+
*.sln
24+
*.sw?

README.md

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,53 @@
1-
# SafalMudra
1+
# Financial Insights RAG Application
2+
3+
This application provides real-time financial insights using Retrieval Augmented Generation (RAG) with Pathway, FAISS, and Ollama.
4+
5+
## Prerequisites
6+
7+
1. Install Ollama and download the Mistral model:
8+
```bash
9+
# macOS/Linux
10+
curl https://ollama.ai/install.sh | sh
11+
# Then run:
12+
ollama pull mistral
13+
```
14+
15+
2. Install Python dependencies:
16+
```bash
17+
pip install -r requirements.txt
18+
```
19+
20+
3. Start the backend server:
21+
```bash
22+
cd backend
23+
python main.py
24+
```
25+
26+
4. Start the frontend development server:
27+
```bash
28+
npm run dev
29+
```
30+
31+
## Features
32+
33+
- Real-time financial data processing with Pathway
34+
- Vector similarity search using FAISS
35+
- Local LLM integration with Ollama (Mistral)
36+
- User authentication with Supabase
37+
- Query history tracking
38+
- Beautiful, responsive UI
39+
40+
## Architecture
41+
42+
- Frontend: React + TypeScript + Vite
43+
- Backend: FastAPI + Pathway
44+
- Vector Store: FAISS
45+
- LLM: Ollama (Mistral)
46+
- Authentication: Supabase
47+
- Real-time Processing: Pathway
48+
49+
## Development
50+
51+
1. The backend server runs on `http://localhost:8000`
52+
2. The frontend development server runs on `http://localhost:5173`
53+
3. Ollama serves the LLM on `http://localhost:11434`

backend/alert_manager.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from typing import List, Dict, Any
2+
import asyncio
3+
import json
4+
import websockets
5+
from datetime import datetime
6+
7+
class AlertManager:
8+
def __init__(self):
9+
self.websocket_connections: List[websockets.WebSocketServerProtocol] = []
10+
self.alert_history: List[Dict[str, Any]] = []
11+
12+
async def start_websocket_server(self):
13+
async def handler(websocket):
14+
self.websocket_connections.append(websocket)
15+
try:
16+
async for message in websocket:
17+
# Handle incoming messages if needed
18+
pass
19+
finally:
20+
self.websocket_connections.remove(websocket)
21+
22+
async with websockets.serve(handler, "localhost", 8765):
23+
await asyncio.Future() # run forever
24+
25+
async def send_alert(self, alert_type: str, message: str, data: Dict[str, Any]):
26+
alert = {
27+
"type": alert_type,
28+
"message": message,
29+
"data": data,
30+
"timestamp": datetime.utcnow().isoformat()
31+
}
32+
33+
self.alert_history.append(alert)
34+
35+
# Send to all connected clients
36+
websockets_to_remove = []
37+
for ws in self.websocket_connections:
38+
try:
39+
await ws.send(json.dumps(alert))
40+
except websockets.exceptions.ConnectionClosed:
41+
websockets_to_remove.append(ws)
42+
43+
# Clean up closed connections
44+
for ws in websockets_to_remove:
45+
self.websocket_connections.remove(ws)
46+
47+
def get_alert_history(self) -> List[Dict[str, Any]]:
48+
return self.alert_history

backend/data/sample.jsonl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"content": "The Federal Reserve maintains its commitment to a 2% inflation target over the longer run and considers this level consistent with its mandate of maximum employment and price stability.", "source": "Federal Reserve Statement", "timestamp": "2024-03-01T10:00:00Z"}
2+
{"content": "SEC regulations require public companies to file annual reports (Form 10-K) and quarterly reports (Form 10-Q) to provide regular updates on their financial position and operations.", "source": "SEC Guidelines", "timestamp": "2024-03-01T10:01:00Z"}
3+
{"content": "The Basel III framework requires banks to maintain a minimum total capital ratio of 8% of risk-weighted assets, with additional buffers for systemically important financial institutions.", "source": "Basel Committee Guidelines", "timestamp": "2024-03-01T10:02:00Z"}

backend/embeddings.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from sentence_transformers import SentenceTransformer
2+
import numpy as np
3+
import faiss
4+
import pickle
5+
from typing import List, Tuple
6+
import os
7+
8+
class EmbeddingStore:
9+
def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
10+
self.model = SentenceTransformer(model_name)
11+
self.dimension = 384 # Dimension for all-MiniLM-L6-v2
12+
self.index = faiss.IndexFlatL2(self.dimension)
13+
self.texts: List[str] = []
14+
self.sources: List[str] = []
15+
16+
def add_texts(self, texts: List[str], sources: List[str]) -> None:
17+
if len(texts) != len(sources):
18+
raise ValueError("Number of texts and sources must match")
19+
20+
embeddings = self.model.encode(texts)
21+
self.index.add(np.array(embeddings).astype('float32'))
22+
self.texts.extend(texts)
23+
self.sources.extend(sources)
24+
25+
def similarity_search(self, query: str, k: int = 3) -> List[Tuple[str, str, float]]:
26+
query_embedding = self.model.encode([query])
27+
distances, indices = self.index.search(np.array(query_embedding).astype('float32'), k)
28+
29+
results = []
30+
for i, idx in enumerate(indices[0]):
31+
if idx < len(self.texts): # Ensure index is valid
32+
results.append((
33+
self.texts[idx],
34+
self.sources[idx],
35+
float(distances[0][i])
36+
))
37+
return results
38+
39+
def save(self, directory: str) -> None:
40+
os.makedirs(directory, exist_ok=True)
41+
faiss.write_index(self.index, os.path.join(directory, "faiss_index"))
42+
with open(os.path.join(directory, "metadata.pkl"), "wb") as f:
43+
pickle.dump((self.texts, self.sources), f)
44+
45+
def load(self, directory: str) -> None:
46+
self.index = faiss.read_index(os.path.join(directory, "faiss_index"))
47+
with open(os.path.join(directory, "metadata.pkl"), "rb") as f:
48+
self.texts, self.sources = pickle.load(f)

backend/llm.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import requests
2+
from typing import List, Dict, Any
3+
4+
class LLM:
5+
def __init__(self, model: str = "mistral"):
6+
self.model = model
7+
self.api_url = "http://localhost:11434/api/generate"
8+
9+
async def generate_response(self, query: str, context: List[Dict[str, Any]]) -> str:
10+
# Format context for the prompt
11+
context_str = "\n".join([
12+
f"Source ({item['source']}): {item['text']}"
13+
for item in context
14+
])
15+
16+
prompt = f"""You are a financial analysis AI assistant. Use the following context to answer the question.
17+
If you cannot answer the question based on the context, say so.
18+
19+
Context:
20+
{context_str}
21+
22+
Question: {query}
23+
24+
Answer:"""
25+
26+
response = requests.post(
27+
self.api_url,
28+
json={
29+
"model": self.model,
30+
"prompt": prompt,
31+
"stream": False
32+
}
33+
)
34+
35+
if response.status_code != 200:
36+
raise Exception("Failed to get response from LLM")
37+
38+
return response.json()["response"]

backend/main.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
from fastapi import FastAPI, HTTPException
2+
from fastapi.middleware.cors import CORSMiddleware
3+
from pydantic import BaseModel
4+
from typing import List, Optional, Dict, Any
5+
from datetime import datetime
6+
import pathway as pw
7+
import asyncio
8+
import uvicorn
9+
10+
from embeddings import EmbeddingStore
11+
from llm import LLM
12+
from metrics_extractor import MetricsExtractor
13+
from alert_manager import AlertManager
14+
15+
app = FastAPI()
16+
17+
# Configure CORS
18+
app.add_middleware(
19+
CORSMiddleware,
20+
allow_origins=["http://localhost:5173"],
21+
allow_credentials=True,
22+
allow_methods=["*"],
23+
allow_headers=["*"],
24+
)
25+
26+
# Initialize components
27+
embedding_store = EmbeddingStore()
28+
llm = LLM()
29+
metrics_extractor = MetricsExtractor()
30+
alert_manager = AlertManager()
31+
32+
class QueryRequest(BaseModel):
33+
query: str
34+
filters: Optional[dict] = None
35+
36+
class QueryResponse(BaseModel):
37+
answer: str
38+
context: List[str]
39+
sources: List[str]
40+
timestamp: str
41+
metrics: Optional[Dict[str, Any]] = None
42+
43+
@app.post("/query")
44+
async def query(request: QueryRequest) -> QueryResponse:
45+
try:
46+
# Get relevant documents from FAISS
47+
results = embedding_store.similarity_search(request.query)
48+
49+
# Format context for LLM
50+
context = [
51+
{"text": text, "source": source}
52+
for text, source, _ in results
53+
]
54+
55+
# Generate response using LLM
56+
answer = await llm.generate_response(request.query, context)
57+
58+
# Extract financial metrics if present
59+
metrics = None
60+
if any(keyword in request.query.lower() for keyword in ['metrics', 'financial', 'numbers']):
61+
metrics_data = {}
62+
for text, _, _ in results:
63+
extracted = metrics_extractor.extract_metrics(text)
64+
for field, value in extracted.__dict__.items():
65+
if value is not None:
66+
metrics_data[field] = value
67+
if metrics_data:
68+
metrics = metrics_data
69+
70+
# Send alert if significant metrics are found
71+
await alert_manager.send_alert(
72+
"metrics_found",
73+
"New financial metrics detected in query",
74+
metrics_data
75+
)
76+
77+
return QueryResponse(
78+
answer=answer,
79+
context=[text for text, _, _ in results],
80+
sources=[source for _, source, _ in results],
81+
timestamp=datetime.utcnow().isoformat(),
82+
metrics=metrics
83+
)
84+
except Exception as e:
85+
raise HTTPException(status_code=500, detail=str(e))
86+
87+
@app.get("/alerts")
88+
async def get_alerts():
89+
return alert_manager.get_alert_history()
90+
91+
# Pathway data processing pipeline
92+
def setup_pathway_pipeline():
93+
with pw.Config.interactive():
94+
# Input table for financial data
95+
input_table = pw.io.fs.read(
96+
"data/*.jsonl",
97+
mode="streaming",
98+
format="json"
99+
)
100+
101+
# Process and index new documents
102+
def process_document(doc):
103+
embedding_store.add_texts(
104+
[doc["content"]],
105+
[doc["source"]]
106+
)
107+
108+
# Extract and alert on new financial metrics
109+
metrics = metrics_extractor.extract_metrics(doc["content"])
110+
if any(value is not None for value in metrics.__dict__.values()):
111+
asyncio.create_task(
112+
alert_manager.send_alert(
113+
"new_document_metrics",
114+
f"New financial metrics found in document from {doc['source']}",
115+
metrics.__dict__
116+
)
117+
)
118+
return doc
119+
120+
processed = input_table.select(process_document)
121+
processed.run()
122+
123+
if __name__ == "__main__":
124+
# Start WebSocket server for alerts
125+
asyncio.create_task(alert_manager.start_websocket_server())
126+
127+
# Start Pathway pipeline
128+
asyncio.create_task(setup_pathway_pipeline())
129+
130+
# Run FastAPI server
131+
uvicorn.run(app, host="0.0.0.0", port=8000)

0 commit comments

Comments
 (0)