Skip to content

Commit d8d77de

Browse files
authored
Sanitize HTTP error responses — no exception class/message leakage (#26)
* Sanitize HTTP error responses — no exception class/message leakage Three handlers interpolated `f"{type(e).__name__}: {e}"` into the JSON error body, exposing internal field names, file paths, and user values to any client. - api/sessions.py: GET /api/sessions/<p>/<id> and .../stats now return a stable generic message; full traceback goes to the server log via current_app.logger.exception. - api/projects.py: per-session error card drops the `error_detail` field; `error: True` already drives the dashboard's error state. - tests/test_error_propagation.py: 6 regression tests, including a source-level guard against re-introducing the offending pattern. Closes #25 * test: pin projects-card shape + fix docstring route (review on PR #26) - Docstring listed GET /api/projects but the leak was on GET /api/projects/<project>/sessions; corrected so the next reader doesn't grep the wrong route. - test_per_session_error_card_omits_error_detail was passing without exercising anything: parse_session is tolerant of malformed lines, so the malformed-jsonl input never triggered the except branch and the conditional `if/for` walked an empty list. Switched to monkeypatching parse_session to raise (mirrors the session-level tests above) and pinned the response shape — assert status == 200 and body is a list, assert at least one error card exists, and verify the leaked field name doesn't appear in the body.
1 parent 289c0da commit d8d77de

3 files changed

Lines changed: 248 additions & 18 deletions

File tree

api/projects.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
"""Project listing endpoints."""
22

3-
import traceback
4-
53
from flask import Blueprint, current_app, jsonify
64

75
from utils.session_path import get_claude_projects_dir, list_projects, list_sessions, safe_join
@@ -71,7 +69,11 @@ def get_project_sessions(project_name):
7169
"first_timestamp": meta["first_timestamp"],
7270
"last_timestamp": meta["last_timestamp"],
7371
})
74-
except Exception as e:
75-
print(f"[ERROR] Failed to parse {s['id']}: {type(e).__name__}: {e}\n{traceback.format_exc()}")
76-
result.append({**s, "title": "Error parsing session", "error": True, "error_detail": f"{type(e).__name__}: {e}"})
72+
except Exception:
73+
# Full detail (class, message, traceback) to the server log via
74+
# logger.exception. The per-session card carries only `error: True`
75+
# — the class-name+message string was a leak (issue #25). The
76+
# operator looks at the server log for triage.
77+
current_app.logger.exception("Failed to parse session %s", s["id"])
78+
result.append({**s, "title": "Error parsing session", "error": True})
7779
return jsonify(result)

api/sessions.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Session detail and stats endpoints."""
22

33
import os
4-
import traceback
54

65
from flask import Blueprint, current_app, jsonify, abort
76

@@ -30,12 +29,14 @@ def get_session(project_name, session_id):
3029
if is_session_excluded(rules, session, project_name):
3130
return jsonify({"error": "Session not found"}), 404
3231
return jsonify(session)
33-
except Exception as e:
34-
tb = traceback.format_exc()
35-
print(f"[ERROR] Failed to parse session {session_id}: {e}\n{tb}")
36-
return jsonify({
37-
"error": f"Failed to parse session: {type(e).__name__}: {e}",
38-
}), 500
32+
except Exception:
33+
# Full traceback (class name, message, stack) goes to the server log
34+
# via logger.exception. The HTTP body returns a stable, generic
35+
# message — never the class name or `e` itself, which would leak
36+
# internal field names, file paths, and user values to any client
37+
# (issue #25).
38+
current_app.logger.exception("Failed to parse session %s", session_id)
39+
return jsonify({"error": "Failed to parse session"}), 500
3940

4041

4142
@sessions_bp.route("/api/sessions/<path:project_name>/<session_id>/stats")
@@ -53,9 +54,8 @@ def get_session_stats(project_name, session_id):
5354
session = parse_session(filepath)
5455
stats = compute_stats(session)
5556
return jsonify(stats)
56-
except Exception as e:
57-
tb = traceback.format_exc()
58-
print(f"[ERROR] Failed to compute stats for {session_id}: {e}\n{tb}")
59-
return jsonify({
60-
"error": f"Failed to compute stats: {type(e).__name__}: {e}",
61-
}), 500
57+
except Exception:
58+
# Same pattern as get_session above — full detail to the server log,
59+
# generic message in the HTTP body (issue #25).
60+
current_app.logger.exception("Failed to compute stats for %s", session_id)
61+
return jsonify({"error": "Failed to compute session stats"}), 500

tests/test_error_propagation.py

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
"""
2+
Regression tests for issue #25 — HTTP error responses must not leak
3+
exception class names or message internals.
4+
5+
Three endpoints previously interpolated `f"{type(e).__name__}: {e}"` into
6+
their JSON error body:
7+
8+
- GET /api/sessions/<project>/<id> (api/sessions.py)
9+
- GET /api/sessions/<project>/<id>/stats (api/sessions.py)
10+
- GET /api/projects/<project>/sessions (api/projects.py — per-session card error_detail)
11+
12+
This file exercises each via Flask test_client with a payload that triggers
13+
the failure path, asserts a 500 (or 200 for projects, since the per-session
14+
error is per-row), and verifies the response body contains no exception
15+
class names from a defensive blocklist.
16+
17+
Run:
18+
pytest tests/test_error_propagation.py -v
19+
"""
20+
21+
from __future__ import annotations
22+
23+
import json
24+
import sys
25+
from pathlib import Path
26+
27+
import pytest
28+
29+
REPO_ROOT = Path(__file__).resolve().parent.parent
30+
sys.path.insert(0, str(REPO_ROOT))
31+
32+
from flask import Flask # noqa: E402
33+
34+
from api.projects import projects_bp # noqa: E402
35+
from api.sessions import sessions_bp # noqa: E402
36+
37+
38+
# Defensive blocklist — any of these substrings appearing in a response body
39+
# would mean the leak regressed. Includes common Python builtin exception
40+
# class names plus internal-looking shapes.
41+
_LEAK_TOKENS = [
42+
"Exception",
43+
"Error",
44+
"KeyError",
45+
"ValueError",
46+
"JSONDecodeError",
47+
"OSError",
48+
"FileNotFoundError",
49+
"TypeError",
50+
"AttributeError",
51+
"Traceback",
52+
"<class",
53+
]
54+
55+
56+
def _assert_no_class_name_leak(body_text: str, allow_word_error: bool = True):
57+
"""Assert no exception class name appears in the response body.
58+
59+
`allow_word_error=True` lets the bare word "Error" pass (common in
60+
legitimate error messages like "Failed to ..."), but still blocks the
61+
`*Error` class-name suffixes which always carry a class-name shape.
62+
"""
63+
for tok in _LEAK_TOKENS:
64+
if allow_word_error and tok == "Error":
65+
continue
66+
assert tok not in body_text, (
67+
f"Response body contains exception-class token {tok!r}: {body_text!r}"
68+
)
69+
70+
71+
@pytest.fixture
72+
def app(tmp_path, monkeypatch):
73+
"""Minimal Flask app with the two blueprints under test."""
74+
app = Flask(__name__)
75+
app.config["TESTING"] = True
76+
app.config["CLAUDE_PROJECTS_DIR"] = str(tmp_path)
77+
app.register_blueprint(sessions_bp)
78+
app.register_blueprint(projects_bp)
79+
return app
80+
81+
82+
@pytest.fixture
83+
def client(app):
84+
return app.test_client()
85+
86+
87+
def _write_session(tmp_path, project: str, session_id: str, content: str):
88+
"""Write a session file (any content) under <tmp_path>/<project>/<id>.jsonl."""
89+
proj = tmp_path / project
90+
proj.mkdir(exist_ok=True)
91+
p = proj / f"{session_id}.jsonl"
92+
p.write_text(content, encoding="utf-8")
93+
return p
94+
95+
96+
# ---------------------------------------------------------------------------
97+
# /api/sessions/<project>/<id>
98+
# ---------------------------------------------------------------------------
99+
100+
class TestGetSessionErrorBody:
101+
102+
def test_500_on_parse_failure_does_not_leak_class_name(self, tmp_path, client, monkeypatch):
103+
# Force the parser to raise an exception with a class-name + message
104+
# that WOULD leak through the old f-string interpolation if the fix
105+
# regressed. (parse_session is normally tolerant — it swallows per-line
106+
# JSONDecodeError — so we monkeypatch to guarantee we hit the except.)
107+
_write_session(tmp_path, "proj", "abc", "{}")
108+
109+
def _boom(*args, **kwargs):
110+
raise KeyError("internal_secret_field_id")
111+
112+
monkeypatch.setattr("api.sessions.parse_session", _boom)
113+
114+
resp = client.get("/api/sessions/proj/abc")
115+
assert resp.status_code == 500
116+
body = resp.get_json()
117+
assert isinstance(body, dict)
118+
assert body.get("error") == "Failed to parse session"
119+
# The exception's args include "internal_secret_field_id" — must not
120+
# appear in the response body.
121+
assert "internal_secret_field_id" not in json.dumps(body)
122+
_assert_no_class_name_leak(json.dumps(body))
123+
124+
def test_404_on_missing_file_keeps_session_id_safe(self, tmp_path, client):
125+
# Session ID is part of the URL so it appears in the 404 message —
126+
# that's fine; what we're guarding is exception-class leakage, which
127+
# 404 doesn't go through.
128+
resp = client.get("/api/sessions/proj/nope-doesnt-exist")
129+
assert resp.status_code == 404
130+
body = resp.get_json()
131+
_assert_no_class_name_leak(json.dumps(body))
132+
133+
def test_400_on_path_traversal_attempt(self, client):
134+
# safe_join rejects this with ValueError; the 400 path returns a
135+
# generic "Invalid path" message and should not leak.
136+
resp = client.get("/api/sessions/..%2Fevil/abc")
137+
assert resp.status_code in (400, 404)
138+
body = resp.get_json()
139+
_assert_no_class_name_leak(json.dumps(body))
140+
141+
142+
# ---------------------------------------------------------------------------
143+
# /api/sessions/<project>/<id>/stats
144+
# ---------------------------------------------------------------------------
145+
146+
class TestGetSessionStatsErrorBody:
147+
148+
def test_500_on_parse_failure_does_not_leak_class_name(self, tmp_path, client, monkeypatch):
149+
_write_session(tmp_path, "proj", "abc", "{}")
150+
151+
def _boom(*args, **kwargs):
152+
raise ValueError("invalid literal: '/private/path/secret.json'")
153+
154+
monkeypatch.setattr("api.sessions.parse_session", _boom)
155+
156+
resp = client.get("/api/sessions/proj/abc/stats")
157+
assert resp.status_code == 500
158+
body = resp.get_json()
159+
assert body.get("error") == "Failed to compute session stats"
160+
# The exception value contains a fake-secret path — must not leak.
161+
assert "/private/path" not in json.dumps(body)
162+
_assert_no_class_name_leak(json.dumps(body))
163+
164+
165+
# ---------------------------------------------------------------------------
166+
# /api/projects (per-session card)
167+
# ---------------------------------------------------------------------------
168+
169+
class TestGetProjectsErrorCard:
170+
171+
def test_per_session_error_card_omits_error_detail(self, tmp_path, client, monkeypatch):
172+
# parse_session is tolerant of malformed lines, so to exercise the
173+
# except branch deterministically (the one that builds the error
174+
# card), monkeypatch it to raise — same pattern as the session-level
175+
# tests above.
176+
_write_session(tmp_path, "myproj", "deadbeef-aaaa-bbbb-cccc-000000000000", "{}")
177+
178+
def _boom(*args, **kwargs):
179+
raise KeyError("internal_secret_field_id")
180+
181+
# api/projects.py imports parse_session inside the handler body,
182+
# so patch the source module rather than the consumer.
183+
monkeypatch.setattr("utils.jsonl_parser.parse_session", _boom)
184+
185+
resp = client.get("/api/projects/myproj/sessions")
186+
# Pin the response shape so a future wrapper change (e.g. {"sessions": [...]})
187+
# doesn't silently turn this test green by skipping the per-row scan.
188+
assert resp.status_code == 200
189+
body = resp.get_json()
190+
assert isinstance(body, list), (
191+
f"Expected JSON array of session cards; got {type(body).__name__}"
192+
)
193+
_assert_no_class_name_leak(json.dumps(body))
194+
error_rows = [r for r in body if isinstance(r, dict) and r.get("error")]
195+
assert error_rows, "Expected at least one per-session error card from the forced parse failure"
196+
for row in error_rows:
197+
assert "error_detail" not in row, (
198+
"Per-session error card still includes error_detail (issue #25)"
199+
)
200+
# The exception's args include "internal_secret_field_id" — must not
201+
# appear anywhere in the response.
202+
assert "internal_secret_field_id" not in json.dumps(body)
203+
204+
205+
# ---------------------------------------------------------------------------
206+
# Source-level guard
207+
# ---------------------------------------------------------------------------
208+
209+
class TestNoExceptionInterpolationInSource:
210+
"""Static guard: any future PR that re-introduces the
211+
`f"...{type(e).__name__}: {e}..."` pattern in api/ fails this test."""
212+
213+
def test_api_files_dont_interpolate_exception_in_jsonify(self):
214+
api_dir = REPO_ROOT / "api"
215+
for py_file in api_dir.glob("*.py"):
216+
src = py_file.read_text(encoding="utf-8")
217+
# Look for the specific footgun: jsonify(...) with f-string that
218+
# contains both `type(e)` or `{e}` AND the word "error".
219+
offending_patterns = [
220+
"type(e).__name__", # the class-name expose
221+
"{e}\"", # bare {e} ending an f-string
222+
"{e},", # bare {e} in a dict-value f-string
223+
]
224+
for pat in offending_patterns:
225+
assert pat not in src, (
226+
f"{py_file.name} contains forbidden pattern {pat!r} "
227+
f"— see issue #25"
228+
)

0 commit comments

Comments
 (0)