Skip to content

Commit c70861a

Browse files
committed
Redact sensitive feed data from logs
1 parent 51ce79a commit c70861a

8 files changed

Lines changed: 278 additions & 28 deletions

File tree

app/web/boot/setup.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class << self
1313
def call!
1414
validate_environment!
1515
configure_request_service!
16+
configure_runtime_logging!
1617
end
1718

1819
private
@@ -28,6 +29,13 @@ def validate_environment!
2829
def configure_request_service!
2930
nil
3031
end
32+
33+
# @return [void]
34+
def configure_runtime_logging!
35+
return unless defined?(Rack::Timeout::Logger)
36+
37+
Rack::Timeout::Logger.logger = AppLogger.logger
38+
end
3139
end
3240
end
3341
end

app/web/request/request_context_middleware.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def build_context(request)
5757
path = request.path_info.to_s
5858
RequestContext::Context.new(
5959
request_id: request_id_for(request),
60-
path: path,
60+
path: LogSanitizer.sanitize_path(path),
6161
http_method: request.request_method.to_s.upcase,
6262
route_group: route_group_for(path),
6363
actor: nil,

app/web/security/security_logger.rb

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# frozen_string_literal: true
22

3-
require 'logger'
43
require 'json'
54
require 'digest'
65
require 'time'
@@ -135,16 +134,7 @@ def log_cache_lifecycle(component, event, details = {})
135134
private
136135

137136
def create_logger
138-
Logger.new($stdout).tap do |log|
139-
log.formatter = proc do |severity, datetime, _progname, msg|
140-
"#{{
141-
timestamp: datetime.iso8601,
142-
level: severity,
143-
service: 'html2rss-web',
144-
**JSON.parse(msg, symbolize_names: true)
145-
}.to_json}\n"
146-
end
147-
end
137+
AppLogger.logger
148138
end
149139

150140
##
@@ -156,7 +146,7 @@ def log_event(event_type, data, severity: :warn)
156146
payload = {
157147
security_event: event_type,
158148
**context_data,
159-
**data
149+
**LogSanitizer.sanitize_details(data)
160150
}.to_json
161151

162152
logger.public_send(severity, payload)

app/web/telemetry/app_logger.rb

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# frozen_string_literal: true
2+
3+
require 'json'
4+
require 'logger'
5+
require 'time'
6+
require 'uri'
7+
8+
module Html2rss
9+
module Web
10+
##
11+
# Shared structured logger for application and middleware runtime events.
12+
module AppLogger
13+
class << self
14+
# @return [Logger]
15+
def logger
16+
Thread.current[:app_logger] ||= build_logger
17+
end
18+
19+
# @return [void]
20+
def reset_logger!
21+
Thread.current[:app_logger] = nil
22+
end
23+
24+
private
25+
26+
# @return [Logger]
27+
def build_logger
28+
Logger.new($stdout).tap do |log|
29+
log.formatter = method(:format_entry)
30+
end
31+
end
32+
33+
# @param severity [String]
34+
# @param datetime [Time]
35+
# @param _progname [String, nil]
36+
# @param message [String]
37+
# @return [String]
38+
def format_entry(severity, datetime, _progname, message)
39+
"#{base_payload(severity, datetime).merge(normalize_message(message)).to_json}\n"
40+
end
41+
42+
# @param severity [String]
43+
# @param datetime [Time]
44+
# @return [Hash{Symbol=>Object}]
45+
def base_payload(severity, datetime)
46+
{
47+
timestamp: datetime.iso8601,
48+
level: severity,
49+
service: 'html2rss-web'
50+
}
51+
end
52+
53+
# @param message [Object]
54+
# @return [Hash{Symbol=>Object}]
55+
def normalize_message(message)
56+
parsed_json(message) || parse_logfmt(message.to_s) || { message: message.to_s }
57+
end
58+
59+
# @param message [Object]
60+
# @return [Hash{Symbol=>Object}, nil]
61+
def parsed_json(message)
62+
JSON.parse(message.to_s, symbolize_names: true)
63+
rescue JSON::ParserError, TypeError
64+
nil
65+
end
66+
67+
# @param message [String]
68+
# @return [Hash{Symbol=>Object}, nil]
69+
def parse_logfmt(message)
70+
pairs = message.scan(/([a-zA-Z0-9_.-]+)=("[^"]*"|\S+)/)
71+
return nil if pairs.empty?
72+
73+
pairs.to_h do |key, raw_value|
74+
[key.to_sym, normalize_logfmt_value(raw_value)]
75+
end
76+
end
77+
78+
# @param raw_value [String]
79+
# @return [String, Integer, Float, TrueClass, FalseClass]
80+
def normalize_logfmt_value(raw_value)
81+
value = raw_value.delete_prefix('"').delete_suffix('"')
82+
return true if value == 'true'
83+
return false if value == 'false'
84+
return value.to_i if value.match?(/\A-?\d+\z/)
85+
return value.to_f if value.match?(/\A-?\d+\.\d+\z/)
86+
87+
value
88+
end
89+
end
90+
end
91+
end
92+
end

app/web/telemetry/log_sanitizer.rb

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# frozen_string_literal: true
2+
3+
require 'digest'
4+
require 'uri'
5+
6+
module Html2rss
7+
module Web
8+
##
9+
# Sanitizes request and detail payloads before structured logging.
10+
module LogSanitizer
11+
FEED_TOKEN_ROUTE = %r{\A(/api/v1/feeds/)([^/.?]+)(\.(?:json|xml|rss))?\z}
12+
13+
class << self
14+
# @param path [String, nil]
15+
# @return [String, nil]
16+
def sanitize_path(path)
17+
return if path.nil?
18+
19+
path.to_s.gsub(FEED_TOKEN_ROUTE, '\1[REDACTED]\3')
20+
end
21+
22+
# @param details [Hash]
23+
# @return [Hash]
24+
def sanitize_details(details)
25+
details.each_with_object({}) do |(key, value), sanitized|
26+
sanitized[key] = sanitize_value(key, value)
27+
end
28+
end
29+
30+
private
31+
32+
# @param key [Object]
33+
# @param value [Object]
34+
# @return [Object]
35+
def sanitize_value(key, value)
36+
return sanitize_url(value) if key.to_sym == :url
37+
return sanitize_details(value) if value.is_a?(Hash)
38+
return value.map { |entry| sanitize_value(key, entry) } if value.is_a?(Array)
39+
40+
value
41+
end
42+
43+
# @param value [Object]
44+
# @return [Hash{Symbol=>Object}, Object]
45+
def sanitize_url(value)
46+
url = value.to_s
47+
return value if url.empty?
48+
49+
uri = URI.parse(url)
50+
{
51+
host: uri.host,
52+
scheme: uri.scheme,
53+
hash: Digest::SHA256.hexdigest(url)[0..11]
54+
}.compact
55+
rescue URI::InvalidURIError
56+
{ hash: Digest::SHA256.hexdigest(url)[0..11] }
57+
end
58+
end
59+
end
60+
end
61+
end

app/web/telemetry/observability.rb

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
# frozen_string_literal: true
22

3-
require 'json'
4-
require 'logger'
5-
require 'time'
6-
73
module Html2rss
84
module Web
95
##
@@ -27,16 +23,7 @@ def emit(event_name:, outcome:, details: {}, level: :info)
2723

2824
# @return [Logger]
2925
def logger
30-
Thread.current[:observability_logger] ||= Logger.new($stdout).tap do |log|
31-
log.formatter = proc do |severity, datetime, _progname, msg|
32-
"#{{
33-
timestamp: datetime.iso8601,
34-
level: severity,
35-
service: 'html2rss-web',
36-
**JSON.parse(msg, symbolize_names: true)
37-
}.to_json}\n"
38-
end
39-
end
26+
AppLogger.logger
4027
end
4128

4229
# @param error [StandardError]
@@ -54,7 +41,7 @@ def handle_emit_error(error, event_name, outcome)
5441
# @return [Hash{Symbol=>Object}]
5542
def build_payload(event_name, outcome, details)
5643
context = RequestContext.current_h
57-
base_payload(event_name, outcome, context).merge(details: details)
44+
base_payload(event_name, outcome, context).merge(details: LogSanitizer.sanitize_details(details))
5845
end
5946

6047
# @param event_name [String]
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# frozen_string_literal: true
2+
3+
require 'spec_helper'
4+
require 'stringio'
5+
6+
require_relative '../../../app/web/request/request_context'
7+
require_relative '../../../app/web/security/security_logger'
8+
require_relative '../../../app/web/telemetry/app_logger'
9+
require_relative '../../../app/web/telemetry/log_sanitizer'
10+
require_relative '../../../app/web/telemetry/observability'
11+
12+
RSpec.describe Html2rss::Web::LogSanitizer do
13+
let(:io) { StringIO.new }
14+
let(:logger) { Logger.new(io).tap { |log| log.formatter = Html2rss::Web::AppLogger.send(:method, :format_entry) } }
15+
let(:context) do
16+
Html2rss::Web::RequestContext::Context.new(
17+
request_id: 'req-123',
18+
path: '/api/v1/feeds/[REDACTED]',
19+
http_method: 'GET',
20+
route_group: 'api_v1',
21+
actor: nil,
22+
strategy: 'faraday',
23+
started_at: '2026-03-21T00:00:00Z'
24+
)
25+
end
26+
27+
before do
28+
Html2rss::Web::RequestContext.set!(context)
29+
Html2rss::Web::AppLogger.reset_logger!
30+
Html2rss::Web::SecurityLogger.reset_logger!
31+
allow(Html2rss::Web::AppLogger).to receive(:logger).and_return(logger)
32+
allow(Html2rss::Web::SecurityLogger).to receive(:logger).and_return(logger)
33+
allow(Html2rss::Web::Observability).to receive(:logger).and_return(logger)
34+
end
35+
36+
after do
37+
Html2rss::Web::RequestContext.clear!
38+
end
39+
40+
it 'redacts feed tokens from token feed request paths' do
41+
expect(described_class.sanitize_path('/api/v1/feeds/token-value-123')).to eq('/api/v1/feeds/[REDACTED]')
42+
expect(described_class.sanitize_path('/api/v1/feeds/token-value-123.json')).to eq('/api/v1/feeds/[REDACTED].json')
43+
end
44+
45+
it 'replaces logged urls with hashed host metadata' do
46+
expected_url = {
47+
host: 'news.ycombinator.com',
48+
scheme: 'https',
49+
hash: Digest::SHA256.hexdigest('https://news.ycombinator.com')[0..11]
50+
}
51+
52+
expect(described_class.sanitize_details(url: 'https://news.ycombinator.com')).to eq(url: expected_url)
53+
end
54+
55+
it 'sanitizes security logger token usage fields' do
56+
Html2rss::Web::SecurityLogger.log_token_usage('very-secret-token', 'https://news.ycombinator.com', true)
57+
payload = JSON.parse(io.string.lines.last, symbolize_names: true)
58+
59+
expect(payload.slice(:path, :url, :token_hash)).to eq(
60+
path: '/api/v1/feeds/[REDACTED]',
61+
url: {
62+
host: 'news.ycombinator.com',
63+
scheme: 'https',
64+
hash: Digest::SHA256.hexdigest('https://news.ycombinator.com')[0..11]
65+
},
66+
token_hash: Digest::SHA256.hexdigest('very-secret-token')[0..7]
67+
)
68+
end
69+
70+
it 'sanitizes observability details' do
71+
Html2rss::Web::Observability.emit(
72+
event_name: 'feed.render',
73+
outcome: 'success',
74+
details: { url: 'https://news.ycombinator.com', strategy: 'faraday' }
75+
)
76+
77+
lines = io.string.lines.map { |line| JSON.parse(line, symbolize_names: true) }
78+
observability_payload = lines.first
79+
80+
expect(observability_payload.dig(:details, :url)).to eq(
81+
host: 'news.ycombinator.com',
82+
scheme: 'https',
83+
hash: Digest::SHA256.hexdigest('https://news.ycombinator.com')[0..11]
84+
)
85+
end
86+
87+
it 'formats rack-timeout logfmt as json' do
88+
logger.info('source=rack-timeout id=req-123 timeout=15000ms state=completed')
89+
90+
payload = JSON.parse(io.string.lines.last, symbolize_names: true)
91+
expect(payload).to include(
92+
source: 'rack-timeout',
93+
id: 'req-123',
94+
timeout: '15000ms',
95+
state: 'completed'
96+
)
97+
end
98+
end

spec/html2rss/web/request_context_middleware_spec.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
expect(response['X-Request-Id']).not_to be_empty
1818
end
1919

20+
it 'redacts feed tokens from request context paths' do
21+
response = Rack::MockRequest.new(redaction_app).get('/api/v1/feeds/sensitive-token-value.json')
22+
expect(response.body).to eq('/api/v1/feeds/[REDACTED].json')
23+
end
24+
2025
private
2126

2227
# @return [Html2rss::Web::RequestContextMiddleware]
@@ -27,4 +32,13 @@ def middleware_app
2732
end
2833
described_class.new(app)
2934
end
35+
36+
# @return [Html2rss::Web::RequestContextMiddleware]
37+
def redaction_app
38+
app = lambda do |_env|
39+
context = Html2rss::Web::RequestContext.current
40+
[200, { 'Content-Type' => 'text/plain' }, [context.path]]
41+
end
42+
described_class.new(app)
43+
end
3044
end

0 commit comments

Comments
 (0)