From c70861aca85cf5f9a4cab5e649fcd4e38b3b3a21 Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Sat, 21 Mar 2026 15:47:40 +0100 Subject: [PATCH 1/7] Redact sensitive feed data from logs --- app/web/boot/setup.rb | 8 ++ app/web/request/request_context_middleware.rb | 2 +- app/web/security/security_logger.rb | 14 +-- app/web/telemetry/app_logger.rb | 92 +++++++++++++++++ app/web/telemetry/log_sanitizer.rb | 61 ++++++++++++ app/web/telemetry/observability.rb | 17 +--- spec/html2rss/web/log_sanitizer_spec.rb | 98 +++++++++++++++++++ .../web/request_context_middleware_spec.rb | 14 +++ 8 files changed, 278 insertions(+), 28 deletions(-) create mode 100644 app/web/telemetry/app_logger.rb create mode 100644 app/web/telemetry/log_sanitizer.rb create mode 100644 spec/html2rss/web/log_sanitizer_spec.rb diff --git a/app/web/boot/setup.rb b/app/web/boot/setup.rb index fd69faa8..edb5687d 100644 --- a/app/web/boot/setup.rb +++ b/app/web/boot/setup.rb @@ -13,6 +13,7 @@ class << self def call! validate_environment! configure_request_service! + configure_runtime_logging! end private @@ -28,6 +29,13 @@ def validate_environment! def configure_request_service! nil end + + # @return [void] + def configure_runtime_logging! + return unless defined?(Rack::Timeout::Logger) + + Rack::Timeout::Logger.logger = AppLogger.logger + end end end end diff --git a/app/web/request/request_context_middleware.rb b/app/web/request/request_context_middleware.rb index ad49ad75..745eecb8 100644 --- a/app/web/request/request_context_middleware.rb +++ b/app/web/request/request_context_middleware.rb @@ -57,7 +57,7 @@ def build_context(request) path = request.path_info.to_s RequestContext::Context.new( request_id: request_id_for(request), - path: path, + path: LogSanitizer.sanitize_path(path), http_method: request.request_method.to_s.upcase, route_group: route_group_for(path), actor: nil, diff --git a/app/web/security/security_logger.rb b/app/web/security/security_logger.rb index 1d15d495..c5ed4a7e 100644 --- a/app/web/security/security_logger.rb +++ b/app/web/security/security_logger.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require 'logger' require 'json' require 'digest' require 'time' @@ -135,16 +134,7 @@ def log_cache_lifecycle(component, event, details = {}) private def create_logger - Logger.new($stdout).tap do |log| - log.formatter = proc do |severity, datetime, _progname, msg| - "#{{ - timestamp: datetime.iso8601, - level: severity, - service: 'html2rss-web', - **JSON.parse(msg, symbolize_names: true) - }.to_json}\n" - end - end + AppLogger.logger end ## @@ -156,7 +146,7 @@ def log_event(event_type, data, severity: :warn) payload = { security_event: event_type, **context_data, - **data + **LogSanitizer.sanitize_details(data) }.to_json logger.public_send(severity, payload) diff --git a/app/web/telemetry/app_logger.rb b/app/web/telemetry/app_logger.rb new file mode 100644 index 00000000..f267fb5b --- /dev/null +++ b/app/web/telemetry/app_logger.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +require 'json' +require 'logger' +require 'time' +require 'uri' + +module Html2rss + module Web + ## + # Shared structured logger for application and middleware runtime events. + module AppLogger + class << self + # @return [Logger] + def logger + Thread.current[:app_logger] ||= build_logger + end + + # @return [void] + def reset_logger! + Thread.current[:app_logger] = nil + end + + private + + # @return [Logger] + def build_logger + Logger.new($stdout).tap do |log| + log.formatter = method(:format_entry) + end + end + + # @param severity [String] + # @param datetime [Time] + # @param _progname [String, nil] + # @param message [String] + # @return [String] + def format_entry(severity, datetime, _progname, message) + "#{base_payload(severity, datetime).merge(normalize_message(message)).to_json}\n" + end + + # @param severity [String] + # @param datetime [Time] + # @return [Hash{Symbol=>Object}] + def base_payload(severity, datetime) + { + timestamp: datetime.iso8601, + level: severity, + service: 'html2rss-web' + } + end + + # @param message [Object] + # @return [Hash{Symbol=>Object}] + def normalize_message(message) + parsed_json(message) || parse_logfmt(message.to_s) || { message: message.to_s } + end + + # @param message [Object] + # @return [Hash{Symbol=>Object}, nil] + def parsed_json(message) + JSON.parse(message.to_s, symbolize_names: true) + rescue JSON::ParserError, TypeError + nil + end + + # @param message [String] + # @return [Hash{Symbol=>Object}, nil] + def parse_logfmt(message) + pairs = message.scan(/([a-zA-Z0-9_.-]+)=("[^"]*"|\S+)/) + return nil if pairs.empty? + + pairs.to_h do |key, raw_value| + [key.to_sym, normalize_logfmt_value(raw_value)] + end + end + + # @param raw_value [String] + # @return [String, Integer, Float, TrueClass, FalseClass] + def normalize_logfmt_value(raw_value) + value = raw_value.delete_prefix('"').delete_suffix('"') + return true if value == 'true' + return false if value == 'false' + return value.to_i if value.match?(/\A-?\d+\z/) + return value.to_f if value.match?(/\A-?\d+\.\d+\z/) + + value + end + end + end + end +end diff --git a/app/web/telemetry/log_sanitizer.rb b/app/web/telemetry/log_sanitizer.rb new file mode 100644 index 00000000..67c25b75 --- /dev/null +++ b/app/web/telemetry/log_sanitizer.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require 'digest' +require 'uri' + +module Html2rss + module Web + ## + # Sanitizes request and detail payloads before structured logging. + module LogSanitizer + FEED_TOKEN_ROUTE = %r{\A(/api/v1/feeds/)([^/.?]+)(\.(?:json|xml|rss))?\z} + + class << self + # @param path [String, nil] + # @return [String, nil] + def sanitize_path(path) + return if path.nil? + + path.to_s.gsub(FEED_TOKEN_ROUTE, '\1[REDACTED]\3') + end + + # @param details [Hash] + # @return [Hash] + def sanitize_details(details) + details.each_with_object({}) do |(key, value), sanitized| + sanitized[key] = sanitize_value(key, value) + end + end + + private + + # @param key [Object] + # @param value [Object] + # @return [Object] + def sanitize_value(key, value) + return sanitize_url(value) if key.to_sym == :url + return sanitize_details(value) if value.is_a?(Hash) + return value.map { |entry| sanitize_value(key, entry) } if value.is_a?(Array) + + value + end + + # @param value [Object] + # @return [Hash{Symbol=>Object}, Object] + def sanitize_url(value) + url = value.to_s + return value if url.empty? + + uri = URI.parse(url) + { + host: uri.host, + scheme: uri.scheme, + hash: Digest::SHA256.hexdigest(url)[0..11] + }.compact + rescue URI::InvalidURIError + { hash: Digest::SHA256.hexdigest(url)[0..11] } + end + end + end + end +end diff --git a/app/web/telemetry/observability.rb b/app/web/telemetry/observability.rb index 496a0c9b..6400db8a 100644 --- a/app/web/telemetry/observability.rb +++ b/app/web/telemetry/observability.rb @@ -1,9 +1,5 @@ # frozen_string_literal: true -require 'json' -require 'logger' -require 'time' - module Html2rss module Web ## @@ -27,16 +23,7 @@ def emit(event_name:, outcome:, details: {}, level: :info) # @return [Logger] def logger - Thread.current[:observability_logger] ||= Logger.new($stdout).tap do |log| - log.formatter = proc do |severity, datetime, _progname, msg| - "#{{ - timestamp: datetime.iso8601, - level: severity, - service: 'html2rss-web', - **JSON.parse(msg, symbolize_names: true) - }.to_json}\n" - end - end + AppLogger.logger end # @param error [StandardError] @@ -54,7 +41,7 @@ def handle_emit_error(error, event_name, outcome) # @return [Hash{Symbol=>Object}] def build_payload(event_name, outcome, details) context = RequestContext.current_h - base_payload(event_name, outcome, context).merge(details: details) + base_payload(event_name, outcome, context).merge(details: LogSanitizer.sanitize_details(details)) end # @param event_name [String] diff --git a/spec/html2rss/web/log_sanitizer_spec.rb b/spec/html2rss/web/log_sanitizer_spec.rb new file mode 100644 index 00000000..3483a426 --- /dev/null +++ b/spec/html2rss/web/log_sanitizer_spec.rb @@ -0,0 +1,98 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'stringio' + +require_relative '../../../app/web/request/request_context' +require_relative '../../../app/web/security/security_logger' +require_relative '../../../app/web/telemetry/app_logger' +require_relative '../../../app/web/telemetry/log_sanitizer' +require_relative '../../../app/web/telemetry/observability' + +RSpec.describe Html2rss::Web::LogSanitizer do + let(:io) { StringIO.new } + let(:logger) { Logger.new(io).tap { |log| log.formatter = Html2rss::Web::AppLogger.send(:method, :format_entry) } } + let(:context) do + Html2rss::Web::RequestContext::Context.new( + request_id: 'req-123', + path: '/api/v1/feeds/[REDACTED]', + http_method: 'GET', + route_group: 'api_v1', + actor: nil, + strategy: 'faraday', + started_at: '2026-03-21T00:00:00Z' + ) + end + + before do + Html2rss::Web::RequestContext.set!(context) + Html2rss::Web::AppLogger.reset_logger! + Html2rss::Web::SecurityLogger.reset_logger! + allow(Html2rss::Web::AppLogger).to receive(:logger).and_return(logger) + allow(Html2rss::Web::SecurityLogger).to receive(:logger).and_return(logger) + allow(Html2rss::Web::Observability).to receive(:logger).and_return(logger) + end + + after do + Html2rss::Web::RequestContext.clear! + end + + it 'redacts feed tokens from token feed request paths' do + expect(described_class.sanitize_path('/api/v1/feeds/token-value-123')).to eq('/api/v1/feeds/[REDACTED]') + expect(described_class.sanitize_path('/api/v1/feeds/token-value-123.json')).to eq('/api/v1/feeds/[REDACTED].json') + end + + it 'replaces logged urls with hashed host metadata' do + expected_url = { + host: 'news.ycombinator.com', + scheme: 'https', + hash: Digest::SHA256.hexdigest('https://news.ycombinator.com')[0..11] + } + + expect(described_class.sanitize_details(url: 'https://news.ycombinator.com')).to eq(url: expected_url) + end + + it 'sanitizes security logger token usage fields' do + Html2rss::Web::SecurityLogger.log_token_usage('very-secret-token', 'https://news.ycombinator.com', true) + payload = JSON.parse(io.string.lines.last, symbolize_names: true) + + expect(payload.slice(:path, :url, :token_hash)).to eq( + path: '/api/v1/feeds/[REDACTED]', + url: { + host: 'news.ycombinator.com', + scheme: 'https', + hash: Digest::SHA256.hexdigest('https://news.ycombinator.com')[0..11] + }, + token_hash: Digest::SHA256.hexdigest('very-secret-token')[0..7] + ) + end + + it 'sanitizes observability details' do + Html2rss::Web::Observability.emit( + event_name: 'feed.render', + outcome: 'success', + details: { url: 'https://news.ycombinator.com', strategy: 'faraday' } + ) + + lines = io.string.lines.map { |line| JSON.parse(line, symbolize_names: true) } + observability_payload = lines.first + + expect(observability_payload.dig(:details, :url)).to eq( + host: 'news.ycombinator.com', + scheme: 'https', + hash: Digest::SHA256.hexdigest('https://news.ycombinator.com')[0..11] + ) + end + + it 'formats rack-timeout logfmt as json' do + logger.info('source=rack-timeout id=req-123 timeout=15000ms state=completed') + + payload = JSON.parse(io.string.lines.last, symbolize_names: true) + expect(payload).to include( + source: 'rack-timeout', + id: 'req-123', + timeout: '15000ms', + state: 'completed' + ) + end +end diff --git a/spec/html2rss/web/request_context_middleware_spec.rb b/spec/html2rss/web/request_context_middleware_spec.rb index 6aae8308..f0e55c73 100644 --- a/spec/html2rss/web/request_context_middleware_spec.rb +++ b/spec/html2rss/web/request_context_middleware_spec.rb @@ -17,6 +17,11 @@ expect(response['X-Request-Id']).not_to be_empty end + it 'redacts feed tokens from request context paths' do + response = Rack::MockRequest.new(redaction_app).get('/api/v1/feeds/sensitive-token-value.json') + expect(response.body).to eq('/api/v1/feeds/[REDACTED].json') + end + private # @return [Html2rss::Web::RequestContextMiddleware] @@ -27,4 +32,13 @@ def middleware_app end described_class.new(app) end + + # @return [Html2rss::Web::RequestContextMiddleware] + def redaction_app + app = lambda do |_env| + context = Html2rss::Web::RequestContext.current + [200, { 'Content-Type' => 'text/plain' }, [context.path]] + end + described_class.new(app) + end end From 1f316d2f04f8ff43713e7529cefe600ce17e85ef Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Sat, 21 Mar 2026 15:53:13 +0100 Subject: [PATCH 2/7] Consolidate structured log emission --- app/web/security/security_logger.rb | 29 +++++++---------------- app/web/telemetry/log_event.rb | 31 +++++++++++++++++++++++++ app/web/telemetry/observability.rb | 9 ++----- spec/html2rss/web/log_sanitizer_spec.rb | 3 +-- 4 files changed, 43 insertions(+), 29 deletions(-) create mode 100644 app/web/telemetry/log_event.rb diff --git a/app/web/security/security_logger.rb b/app/web/security/security_logger.rb index c5ed4a7e..639c1da3 100644 --- a/app/web/security/security_logger.rb +++ b/app/web/security/security_logger.rb @@ -10,16 +10,10 @@ module Web # Provides structured logging for security events to stdout module SecurityLogger class << self - # Initialize logger to stdout with structured JSON output - # @return [Logger] - def logger - Thread.current[:security_logger] ||= create_logger - end - - # Reset logger (for testing) + # Reset shared logger state for tests. # @return [void] def reset_logger! - Thread.current[:security_logger] = nil + AppLogger.reset_logger! end ## @@ -133,23 +127,18 @@ def log_cache_lifecycle(component, event, details = {}) private - def create_logger - AppLogger.logger - end - ## # Log a security event # @param event_type [String] type of security event # @param data [Hash] event data def log_event(event_type, data, severity: :warn) - context_data = RequestContext.current_h - payload = { - security_event: event_type, - **context_data, - **LogSanitizer.sanitize_details(data) - }.to_json - - logger.public_send(severity, payload) + LogEvent.emit( + level: severity, + payload: { + security_event: event_type, + **data + } + ) rescue StandardError => error handle_logging_error(error, event_type, data) end diff --git a/app/web/telemetry/log_event.rb b/app/web/telemetry/log_event.rb new file mode 100644 index 00000000..0b85a561 --- /dev/null +++ b/app/web/telemetry/log_event.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Html2rss + module Web + ## + # Shared structured log emitter for request-scoped application events. + module LogEvent + class << self + # @param payload [Hash{Symbol=>Object}] + # @param level [Symbol] + # @return [void] + def emit(payload:, level: :info) + logger.public_send(level, build_payload(payload).to_json) + end + + private + + # @return [Logger] + def logger + AppLogger.logger + end + + # @param payload [Hash{Symbol=>Object}] + # @return [Hash{Symbol=>Object}] + def build_payload(payload) + RequestContext.current_h.merge(LogSanitizer.sanitize_details(payload)) + end + end + end + end +end diff --git a/app/web/telemetry/observability.rb b/app/web/telemetry/observability.rb index 6400db8a..73c38bc6 100644 --- a/app/web/telemetry/observability.rb +++ b/app/web/telemetry/observability.rb @@ -14,18 +14,13 @@ class << self # @param level [Symbol] # @return [void] def emit(event_name:, outcome:, details: {}, level: :info) - logger.public_send(level, build_payload(event_name, outcome, details).to_json) + LogEvent.emit(payload: build_payload(event_name, outcome, details), level: level) rescue StandardError => error handle_emit_error(error, event_name, outcome) end private - # @return [Logger] - def logger - AppLogger.logger - end - # @param error [StandardError] # @param event_name [String] # @param outcome [String] @@ -41,7 +36,7 @@ def handle_emit_error(error, event_name, outcome) # @return [Hash{Symbol=>Object}] def build_payload(event_name, outcome, details) context = RequestContext.current_h - base_payload(event_name, outcome, context).merge(details: LogSanitizer.sanitize_details(details)) + base_payload(event_name, outcome, context).merge(details: details) end # @param event_name [String] diff --git a/spec/html2rss/web/log_sanitizer_spec.rb b/spec/html2rss/web/log_sanitizer_spec.rb index 3483a426..1d743684 100644 --- a/spec/html2rss/web/log_sanitizer_spec.rb +++ b/spec/html2rss/web/log_sanitizer_spec.rb @@ -6,6 +6,7 @@ require_relative '../../../app/web/request/request_context' require_relative '../../../app/web/security/security_logger' require_relative '../../../app/web/telemetry/app_logger' +require_relative '../../../app/web/telemetry/log_event' require_relative '../../../app/web/telemetry/log_sanitizer' require_relative '../../../app/web/telemetry/observability' @@ -29,8 +30,6 @@ Html2rss::Web::AppLogger.reset_logger! Html2rss::Web::SecurityLogger.reset_logger! allow(Html2rss::Web::AppLogger).to receive(:logger).and_return(logger) - allow(Html2rss::Web::SecurityLogger).to receive(:logger).and_return(logger) - allow(Html2rss::Web::Observability).to receive(:logger).and_return(logger) end after do From 77b3358a4b86090e8655f72aea4120259b231017 Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Sat, 21 Mar 2026 16:23:04 +0100 Subject: [PATCH 3/7] Fix log sanitizer loading and cleanup --- app/web/request/request_context_middleware.rb | 1 + .../{telemetry => security}/log_sanitizer.rb | 30 ++++++++++++++----- app/web/security/security_logger.rb | 1 - app/web/telemetry/app_logger.rb | 1 - spec/html2rss/web/log_sanitizer_spec.rb | 13 ++++++-- .../web/request_context_middleware_spec.rb | 1 + 6 files changed, 35 insertions(+), 12 deletions(-) rename app/web/{telemetry => security}/log_sanitizer.rb (60%) diff --git a/app/web/request/request_context_middleware.rb b/app/web/request/request_context_middleware.rb index 745eecb8..61a18cf3 100644 --- a/app/web/request/request_context_middleware.rb +++ b/app/web/request/request_context_middleware.rb @@ -3,6 +3,7 @@ require 'rack/request' require 'securerandom' require 'time' +require_relative '../security/log_sanitizer' module Html2rss module Web diff --git a/app/web/telemetry/log_sanitizer.rb b/app/web/security/log_sanitizer.rb similarity index 60% rename from app/web/telemetry/log_sanitizer.rb rename to app/web/security/log_sanitizer.rb index 67c25b75..4881c8a7 100644 --- a/app/web/telemetry/log_sanitizer.rb +++ b/app/web/security/log_sanitizer.rb @@ -1,14 +1,14 @@ # frozen_string_literal: true require 'digest' -require 'uri' +require 'html2rss/url' module Html2rss module Web ## - # Sanitizes request and detail payloads before structured logging. + # Sanitizes request paths and log payloads before they are emitted. module LogSanitizer - FEED_TOKEN_ROUTE = %r{\A(/api/v1/feeds/)([^/.?]+)(\.(?:json|xml|rss))?\z} + FEED_TOKEN_ROUTE = %r{\A(/api/v1/feeds/)([^/?]+)\z} class << self # @param path [String, nil] @@ -16,7 +16,11 @@ class << self def sanitize_path(path) return if path.nil? - path.to_s.gsub(FEED_TOKEN_ROUTE, '\1[REDACTED]\3') + path_string = path.to_s + suffix = feed_suffix(path_string) + token_path = suffix ? path_string.delete_suffix(suffix) : path_string + + token_path.gsub(FEED_TOKEN_ROUTE, "\\1[REDACTED]#{suffix}") end # @param details [Hash] @@ -29,6 +33,16 @@ def sanitize_details(details) private + # @param path [String] + # @return [String, nil] + def feed_suffix(path) + return '.json' if path.end_with?('.json') + return '.xml' if path.end_with?('.xml') + return '.rss' if path.end_with?('.rss') + + nil + end + # @param key [Object] # @param value [Object] # @return [Object] @@ -46,13 +60,13 @@ def sanitize_url(value) url = value.to_s return value if url.empty? - uri = URI.parse(url) + normalized_url = Html2rss::Url.for_channel(url) { - host: uri.host, - scheme: uri.scheme, + host: normalized_url.host, + scheme: normalized_url.scheme, hash: Digest::SHA256.hexdigest(url)[0..11] }.compact - rescue URI::InvalidURIError + rescue StandardError { hash: Digest::SHA256.hexdigest(url)[0..11] } end end diff --git a/app/web/security/security_logger.rb b/app/web/security/security_logger.rb index 639c1da3..d4e34332 100644 --- a/app/web/security/security_logger.rb +++ b/app/web/security/security_logger.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require 'json' require 'digest' require 'time' module Html2rss diff --git a/app/web/telemetry/app_logger.rb b/app/web/telemetry/app_logger.rb index f267fb5b..ae4ea68d 100644 --- a/app/web/telemetry/app_logger.rb +++ b/app/web/telemetry/app_logger.rb @@ -3,7 +3,6 @@ require 'json' require 'logger' require 'time' -require 'uri' module Html2rss module Web diff --git a/spec/html2rss/web/log_sanitizer_spec.rb b/spec/html2rss/web/log_sanitizer_spec.rb index 1d743684..fbe1562f 100644 --- a/spec/html2rss/web/log_sanitizer_spec.rb +++ b/spec/html2rss/web/log_sanitizer_spec.rb @@ -7,7 +7,7 @@ require_relative '../../../app/web/security/security_logger' require_relative '../../../app/web/telemetry/app_logger' require_relative '../../../app/web/telemetry/log_event' -require_relative '../../../app/web/telemetry/log_sanitizer' +require_relative '../../../app/web/security/log_sanitizer' require_relative '../../../app/web/telemetry/observability' RSpec.describe Html2rss::Web::LogSanitizer do @@ -36,9 +36,12 @@ Html2rss::Web::RequestContext.clear! end - it 'redacts feed tokens from token feed request paths' do + it 'redacts feed tokens from token feed request paths', :aggregate_failures do expect(described_class.sanitize_path('/api/v1/feeds/token-value-123')).to eq('/api/v1/feeds/[REDACTED]') expect(described_class.sanitize_path('/api/v1/feeds/token-value-123.json')).to eq('/api/v1/feeds/[REDACTED].json') + expect( + described_class.sanitize_path('/api/v1/feeds/eyJwIjoiYS5iLmMifQ==.xml') + ).to eq('/api/v1/feeds/[REDACTED].xml') end it 'replaces logged urls with hashed host metadata' do @@ -51,6 +54,12 @@ expect(described_class.sanitize_details(url: 'https://news.ycombinator.com')).to eq(url: expected_url) end + it 'falls back to a hash for malformed urls' do + expect(described_class.sanitize_details(url: '://bad url')).to eq( + url: { hash: Digest::SHA256.hexdigest('://bad url')[0..11] } + ) + end + it 'sanitizes security logger token usage fields' do Html2rss::Web::SecurityLogger.log_token_usage('very-secret-token', 'https://news.ycombinator.com', true) payload = JSON.parse(io.string.lines.last, symbolize_names: true) diff --git a/spec/html2rss/web/request_context_middleware_spec.rb b/spec/html2rss/web/request_context_middleware_spec.rb index f0e55c73..0b671407 100644 --- a/spec/html2rss/web/request_context_middleware_spec.rb +++ b/spec/html2rss/web/request_context_middleware_spec.rb @@ -4,6 +4,7 @@ require 'rack/mock' require_relative '../../../app/web/request/request_context' +require_relative '../../../app/web/security/log_sanitizer' require_relative '../../../app/web/request/request_context_middleware' RSpec.describe Html2rss::Web::RequestContextMiddleware do From 40144e208db524186d40f545ad5f63f8bf8c7793 Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Sat, 21 Mar 2026 16:30:48 +0100 Subject: [PATCH 4/7] Tighten log sanitization coverage --- app/web/security/log_sanitizer.rb | 9 ++++- spec/html2rss/web/boot/setup_spec.rb | 23 ++++++++--- spec/html2rss/web/log_sanitizer_spec.rb | 51 +++++++++++++++++++++++-- 3 files changed, 73 insertions(+), 10 deletions(-) diff --git a/app/web/security/log_sanitizer.rb b/app/web/security/log_sanitizer.rb index 4881c8a7..3b79769e 100644 --- a/app/web/security/log_sanitizer.rb +++ b/app/web/security/log_sanitizer.rb @@ -47,13 +47,20 @@ def feed_suffix(path) # @param value [Object] # @return [Object] def sanitize_value(key, value) - return sanitize_url(value) if key.to_sym == :url return sanitize_details(value) if value.is_a?(Hash) return value.map { |entry| sanitize_value(key, entry) } if value.is_a?(Array) + return sanitize_url(value) if url_key?(key) value end + # @param key [Object] + # @return [Boolean] + def url_key?(key) + key_name = key.to_s + key_name == 'url' || key_name.end_with?('_url', '_urls') + end + # @param value [Object] # @return [Hash{Symbol=>Object}, Object] def sanitize_url(value) diff --git a/spec/html2rss/web/boot/setup_spec.rb b/spec/html2rss/web/boot/setup_spec.rb index 8682dbce..c5c2461f 100644 --- a/spec/html2rss/web/boot/setup_spec.rb +++ b/spec/html2rss/web/boot/setup_spec.rb @@ -5,13 +5,13 @@ require_relative '../../../../app' RSpec.describe Html2rss::Web::Boot::Setup do - describe '.call!' do - before do - allow(Html2rss::Web::EnvironmentValidator).to receive(:validate_environment!) - allow(Html2rss::Web::EnvironmentValidator).to receive(:validate_production_security!) - allow(Html2rss::Web::Flags).to receive(:validate!) - end + before do + allow(Html2rss::Web::EnvironmentValidator).to receive(:validate_environment!) + allow(Html2rss::Web::EnvironmentValidator).to receive(:validate_production_security!) + allow(Html2rss::Web::Flags).to receive(:validate!) + end + describe '.call!' do it 'validates environment state', :aggregate_failures do described_class.call! @@ -19,5 +19,16 @@ expect(Html2rss::Web::EnvironmentValidator).to have_received(:validate_production_security!).once expect(Html2rss::Web::Flags).to have_received(:validate!).once end + + it 'routes rack-timeout logs through the shared app logger' do + stub_const('Rack::Timeout::Logger', Class.new) + logger_holder = { value: nil } + Rack::Timeout::Logger.define_singleton_method(:logger=) { |value| logger_holder[:value] = value } + Rack::Timeout::Logger.define_singleton_method(:logger) { logger_holder[:value] } + + described_class.call! + + expect(Rack::Timeout::Logger.logger).to be(Html2rss::Web::AppLogger.logger) + end end end diff --git a/spec/html2rss/web/log_sanitizer_spec.rb b/spec/html2rss/web/log_sanitizer_spec.rb index fbe1562f..3c9de620 100644 --- a/spec/html2rss/web/log_sanitizer_spec.rb +++ b/spec/html2rss/web/log_sanitizer_spec.rb @@ -48,7 +48,7 @@ expected_url = { host: 'news.ycombinator.com', scheme: 'https', - hash: Digest::SHA256.hexdigest('https://news.ycombinator.com')[0..11] + hash: url_hash('https://news.ycombinator.com') } expect(described_class.sanitize_details(url: 'https://news.ycombinator.com')).to eq(url: expected_url) @@ -60,6 +60,14 @@ ) end + it 'sanitizes nested url fields when emitting shared log events' do + Html2rss::Web::LogEvent.emit(payload: nested_url_payload) + + payload = JSON.parse(io.string.lines.last, symbolize_names: true) + + expect(payload.slice(:url, :related_urls, :details)).to eq(expected_nested_url_payload) + end + it 'sanitizes security logger token usage fields' do Html2rss::Web::SecurityLogger.log_token_usage('very-secret-token', 'https://news.ycombinator.com', true) payload = JSON.parse(io.string.lines.last, symbolize_names: true) @@ -69,7 +77,7 @@ url: { host: 'news.ycombinator.com', scheme: 'https', - hash: Digest::SHA256.hexdigest('https://news.ycombinator.com')[0..11] + hash: url_hash('https://news.ycombinator.com') }, token_hash: Digest::SHA256.hexdigest('very-secret-token')[0..7] ) @@ -88,7 +96,7 @@ expect(observability_payload.dig(:details, :url)).to eq( host: 'news.ycombinator.com', scheme: 'https', - hash: Digest::SHA256.hexdigest('https://news.ycombinator.com')[0..11] + hash: url_hash('https://news.ycombinator.com') ) end @@ -103,4 +111,41 @@ state: 'completed' ) end + + private + + # @return [Hash{Symbol=>Object}] + def nested_url_payload + { + url: 'https://news.ycombinator.com', + related_urls: ['https://example.com/feed.xml'], + details: { url: 'https://lobste.rs/s/test' } + } + end + + # @return [Hash{Symbol=>Object}] + def expected_nested_url_payload + { + url: sanitized_url('news.ycombinator.com', 'https://news.ycombinator.com'), + related_urls: [ + sanitized_url('example.com', 'https://example.com/feed.xml') + ], + details: { + url: sanitized_url('lobste.rs', 'https://lobste.rs/s/test') + } + } + end + + # @param host [String] + # @param url [String] + # @return [Hash{Symbol=>String}] + def sanitized_url(host, url) + { host:, scheme: 'https', hash: url_hash(url) } + end + + # @param url [String] + # @return [String] + def url_hash(url) + Digest::SHA256.hexdigest(url)[0..11] + end end From 996fa53765a653cc353af62d831cb63b7fc98878 Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Sun, 22 Mar 2026 09:55:20 +0100 Subject: [PATCH 5/7] Align structured logging grammar --- app/web/config/environment_validator.rb | 49 +++++++++++++---- app/web/security/security_logger.rb | 12 ++--- app/web/telemetry/observability.rb | 4 +- .../web/environment_validator_spec.rb | 52 +++++++++++++++++++ spec/html2rss/web/log_sanitizer_spec.rb | 34 ++++++------ 5 files changed, 115 insertions(+), 36 deletions(-) diff --git a/app/web/config/environment_validator.rb b/app/web/config/environment_validator.rb index 36f0b18e..4d80ee1a 100644 --- a/app/web/config/environment_validator.rb +++ b/app/web/config/environment_validator.rb @@ -49,12 +49,17 @@ def auto_source_enabled? def set_development_key ENV['HTML2RSS_SECRET_KEY'] = 'development-default-key-not-for-production' - puts '⚠️ WARNING: Using default secret key for development/testing only!' - puts ' Set HTML2RSS_SECRET_KEY environment variable for production use.' + log_development_default_secret_key_warning + warn_lines( + 'WARNING: Using default secret key for development/testing only!', + 'Set HTML2RSS_SECRET_KEY environment variable for production use.' + ) + nil end def show_production_error - puts production_error_message + SecurityLogger.log_config_validation_failure('secret_key', 'Missing required secret key') + warn_lines(*production_error_message.lines(chomp: true)) exit 1 end @@ -79,9 +84,11 @@ def validate_secret_key! return unless secret == 'your-generated-secret-key-here' || secret.length < 32 SecurityLogger.log_config_validation_failure('secret_key', 'Invalid or weak secret key') - puts '❌ CRITICAL: Invalid secret key for production deployment!' - puts ' Secret key must be at least 32 characters and not the default placeholder.' - puts ' Generate a secure key: openssl rand -hex 32' + warn_lines( + 'CRITICAL: Invalid secret key for production deployment!', + 'Secret key must be at least 32 characters and not the default placeholder.', + 'Generate a secure key: openssl rand -hex 32' + ) exit 1 end @@ -90,11 +97,35 @@ def validate_account_configuration! weak_tokens = accounts.select { |acc| acc[:token].length < 16 } return unless weak_tokens.any? + handle_weak_account_tokens!(weak_tokens) + end + + # @param lines [Array] + # @return [void] + def warn_lines(*lines) + lines.each { |line| Kernel.warn(line) } + nil + end + + # @return [void] + def log_development_default_secret_key_warning + SecurityLogger.log_config_validation_failure( + 'secret_key', + 'Using development default secret key', + severity: :warn + ) + end + + # @param weak_tokens [ArrayString}>] + # @return [void] + def handle_weak_account_tokens!(weak_tokens) weak_usernames = weak_tokens.map { |acc| acc[:username] }.join(', ') SecurityLogger.log_config_validation_failure('account_tokens', "Weak tokens for users: #{weak_usernames}") - puts '❌ CRITICAL: Weak authentication tokens detected in production!' - puts ' All tokens must be at least 16 characters long.' - puts " Weak tokens found for users: #{weak_usernames}" + warn_lines( + 'CRITICAL: Weak authentication tokens detected in production!', + 'All tokens must be at least 16 characters long.', + "Weak tokens found for users: #{weak_usernames}" + ) exit 1 end end diff --git a/app/web/security/security_logger.rb b/app/web/security/security_logger.rb index d4e34332..ec7058c4 100644 --- a/app/web/security/security_logger.rb +++ b/app/web/security/security_logger.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true require 'digest' -require 'time' module Html2rss module Web ## @@ -103,12 +102,13 @@ def log_blocked_request(ip, reason, endpoint) # Log configuration validation failure # @param component [String] component that failed validation # @param details [String] validation failure details + # @param severity [Symbol] # @return [void] - def log_config_validation_failure(component, details) + def log_config_validation_failure(component, details, severity: :error) log_event('config_validation_failure', { component: component, details: details - }, severity: :error) + }, severity: severity) end # Log lifecycle events for in-memory config/cache snapshots @@ -135,7 +135,7 @@ def log_event(event_type, data, severity: :warn) level: severity, payload: { security_event: event_type, - **data + details: data } ) rescue StandardError => error @@ -148,8 +148,8 @@ def log_event(event_type, data, severity: :warn) # @param event_type [String] type of security event # @param data [Hash] event data def handle_logging_error(error, event_type, data) - Kernel.warn("Security logging error: #{error.message}") - Kernel.warn("Security event: #{event_type} - #{data}") + Kernel.warn("Structured logging fallback: #{error.class}: #{error.message}") + Kernel.warn("component=security_logger security_event=#{event_type} details=#{data}") end end end diff --git a/app/web/telemetry/observability.rb b/app/web/telemetry/observability.rb index 73c38bc6..78eaae1f 100644 --- a/app/web/telemetry/observability.rb +++ b/app/web/telemetry/observability.rb @@ -26,8 +26,8 @@ def emit(event_name:, outcome:, details: {}, level: :info) # @param outcome [String] # @return [void] def handle_emit_error(error, event_name, outcome) - Kernel.warn("Observability emit error: #{error.message}") - Kernel.warn("event_name=#{event_name} outcome=#{outcome}") + Kernel.warn("Structured logging fallback: #{error.class}: #{error.message}") + Kernel.warn("component=observability event_name=#{event_name} outcome=#{outcome}") end # @param event_name [String] diff --git a/spec/html2rss/web/environment_validator_spec.rb b/spec/html2rss/web/environment_validator_spec.rb index 0a5ed65e..fa7e0248 100644 --- a/spec/html2rss/web/environment_validator_spec.rb +++ b/spec/html2rss/web/environment_validator_spec.rb @@ -4,8 +4,60 @@ require 'climate_control' require_relative '../../../app/web/config/environment_validator' +require_relative '../../../app/web/config/flags' +require_relative '../../../app/web/security/security_logger' RSpec.describe Html2rss::Web::EnvironmentValidator do + describe '.validate_environment!' do + it 'sets a development default secret key without exiting' do + allow(Html2rss::Web::SecurityLogger).to receive(:log_config_validation_failure) + allow(Kernel).to receive(:warn) + + ClimateControl.modify('RACK_ENV' => 'development', 'HTML2RSS_SECRET_KEY' => nil) do + described_class.validate_environment! + expect(ENV.fetch('HTML2RSS_SECRET_KEY')).to eq('development-default-key-not-for-production') + end + end + + it 'logs development default secret key warnings' do + allow(Html2rss::Web::SecurityLogger).to receive(:log_config_validation_failure) + allow(Kernel).to receive(:warn) + + ClimateControl.modify('RACK_ENV' => 'development', 'HTML2RSS_SECRET_KEY' => nil) do + described_class.validate_environment! + end + + expect(Html2rss::Web::SecurityLogger).to have_received(:log_config_validation_failure) + .with('secret_key', 'Using development default secret key', severity: :warn) + end + + it 'logs missing production secret key failures before exiting' do + allow(Html2rss::Web::SecurityLogger).to receive(:log_config_validation_failure) + allow(Kernel).to receive(:warn) + + ClimateControl.modify('RACK_ENV' => 'production', 'HTML2RSS_SECRET_KEY' => nil) do + expect { described_class.validate_environment! }.to raise_error(SystemExit) + end + + expect(Html2rss::Web::SecurityLogger).to have_received(:log_config_validation_failure) + .with('secret_key', 'Missing required secret key') + end + end + + describe '.validate_production_security!' do + it 'logs weak production secret keys before exiting' do + allow(Html2rss::Web::SecurityLogger).to receive(:log_config_validation_failure) + allow(Kernel).to receive(:warn) + + ClimateControl.modify('RACK_ENV' => 'production', 'HTML2RSS_SECRET_KEY' => 'short-secret') do + expect { described_class.validate_production_security! }.to raise_error(SystemExit) + end + + expect(Html2rss::Web::SecurityLogger).to have_received(:log_config_validation_failure) + .with('secret_key', 'Invalid or weak secret key') + end + end + describe '.auto_source_enabled?' do context 'when in development' do it 'defaults to enabled when flag is not set' do diff --git a/spec/html2rss/web/log_sanitizer_spec.rb b/spec/html2rss/web/log_sanitizer_spec.rb index 3c9de620..43537fb0 100644 --- a/spec/html2rss/web/log_sanitizer_spec.rb +++ b/spec/html2rss/web/log_sanitizer_spec.rb @@ -13,6 +13,13 @@ RSpec.describe Html2rss::Web::LogSanitizer do let(:io) { StringIO.new } let(:logger) { Logger.new(io).tap { |log| log.formatter = Html2rss::Web::AppLogger.send(:method, :format_entry) } } + let(:sanitized_url) do + { + host: 'news.ycombinator.com', + scheme: 'https', + hash: Digest::SHA256.hexdigest('https://news.ycombinator.com')[0..11] + } + end let(:context) do Html2rss::Web::RequestContext::Context.new( request_id: 'req-123', @@ -45,13 +52,7 @@ end it 'replaces logged urls with hashed host metadata' do - expected_url = { - host: 'news.ycombinator.com', - scheme: 'https', - hash: url_hash('https://news.ycombinator.com') - } - - expect(described_class.sanitize_details(url: 'https://news.ycombinator.com')).to eq(url: expected_url) + expect(described_class.sanitize_details(url: 'https://news.ycombinator.com')).to eq(url: sanitized_url) end it 'falls back to a hash for malformed urls' do @@ -72,14 +73,13 @@ Html2rss::Web::SecurityLogger.log_token_usage('very-secret-token', 'https://news.ycombinator.com', true) payload = JSON.parse(io.string.lines.last, symbolize_names: true) - expect(payload.slice(:path, :url, :token_hash)).to eq( + expect(payload.slice(:path, :details)).to eq( path: '/api/v1/feeds/[REDACTED]', - url: { - host: 'news.ycombinator.com', - scheme: 'https', - hash: url_hash('https://news.ycombinator.com') - }, - token_hash: Digest::SHA256.hexdigest('very-secret-token')[0..7] + details: { + url: sanitized_url, + token_hash: Digest::SHA256.hexdigest('very-secret-token')[0..7], + success: true + } ) end @@ -93,11 +93,7 @@ lines = io.string.lines.map { |line| JSON.parse(line, symbolize_names: true) } observability_payload = lines.first - expect(observability_payload.dig(:details, :url)).to eq( - host: 'news.ycombinator.com', - scheme: 'https', - hash: url_hash('https://news.ycombinator.com') - ) + expect(observability_payload.dig(:details, :url)).to eq(sanitized_url) end it 'formats rack-timeout logfmt as json' do From fb6e12317cf19a5bbacd5d6eb61ee2b6c9b87f6d Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Sun, 22 Mar 2026 11:08:24 +0100 Subject: [PATCH 6/7] Fix logging fallback sanitization --- app/web/security/security_logger.rb | 3 +- app/web/telemetry/app_logger.rb | 16 +++++-- spec/html2rss/web/log_sanitizer_spec.rb | 57 ++++++++++++++++++++----- 3 files changed, 61 insertions(+), 15 deletions(-) diff --git a/app/web/security/security_logger.rb b/app/web/security/security_logger.rb index ec7058c4..bc0fce99 100644 --- a/app/web/security/security_logger.rb +++ b/app/web/security/security_logger.rb @@ -148,8 +148,9 @@ def log_event(event_type, data, severity: :warn) # @param event_type [String] type of security event # @param data [Hash] event data def handle_logging_error(error, event_type, data) + sanitized_data = LogSanitizer.sanitize_details(data) Kernel.warn("Structured logging fallback: #{error.class}: #{error.message}") - Kernel.warn("component=security_logger security_event=#{event_type} details=#{data}") + Kernel.warn("component=security_logger security_event=#{event_type} details=#{sanitized_data}") end end end diff --git a/app/web/telemetry/app_logger.rb b/app/web/telemetry/app_logger.rb index ae4ea68d..7281c5dc 100644 --- a/app/web/telemetry/app_logger.rb +++ b/app/web/telemetry/app_logger.rb @@ -52,17 +52,27 @@ def base_payload(severity, datetime) # @param message [Object] # @return [Hash{Symbol=>Object}] def normalize_message(message) - parsed_json(message) || parse_logfmt(message.to_s) || { message: message.to_s } + message_string = message.to_s + return parsed_json(message_string) if json_like?(message_string) + + parse_logfmt(message_string) || { message: message_string } end - # @param message [Object] + # @param message [String] # @return [Hash{Symbol=>Object}, nil] def parsed_json(message) - JSON.parse(message.to_s, symbolize_names: true) + JSON.parse(message, symbolize_names: true) rescue JSON::ParserError, TypeError nil end + # @param message [String] + # @return [Boolean] + def json_like?(message) + stripped = message.lstrip + stripped.start_with?('{', '[') + end + # @param message [String] # @return [Hash{Symbol=>Object}, nil] def parse_logfmt(message) diff --git a/spec/html2rss/web/log_sanitizer_spec.rb b/spec/html2rss/web/log_sanitizer_spec.rb index 43537fb0..1c15c237 100644 --- a/spec/html2rss/web/log_sanitizer_spec.rb +++ b/spec/html2rss/web/log_sanitizer_spec.rb @@ -12,8 +12,8 @@ RSpec.describe Html2rss::Web::LogSanitizer do let(:io) { StringIO.new } - let(:logger) { Logger.new(io).tap { |log| log.formatter = Html2rss::Web::AppLogger.send(:method, :format_entry) } } - let(:sanitized_url) do + let(:test_logger) { Logger.new(io).tap { |log| log.formatter = Html2rss::Web::AppLogger.send(:method, :format_entry) } } + let(:expected_news_url) do { host: 'news.ycombinator.com', scheme: 'https', @@ -36,7 +36,7 @@ Html2rss::Web::RequestContext.set!(context) Html2rss::Web::AppLogger.reset_logger! Html2rss::Web::SecurityLogger.reset_logger! - allow(Html2rss::Web::AppLogger).to receive(:logger).and_return(logger) + allow(Logger).to receive(:new).and_return(test_logger) end after do @@ -52,7 +52,7 @@ end it 'replaces logged urls with hashed host metadata' do - expect(described_class.sanitize_details(url: 'https://news.ycombinator.com')).to eq(url: sanitized_url) + expect(described_class.sanitize_details(url: 'https://news.ycombinator.com')).to eq(url: expected_news_url) end it 'falls back to a hash for malformed urls' do @@ -76,13 +76,22 @@ expect(payload.slice(:path, :details)).to eq( path: '/api/v1/feeds/[REDACTED]', details: { - url: sanitized_url, + url: expected_news_url, token_hash: Digest::SHA256.hexdigest('very-secret-token')[0..7], success: true } ) end + it 'sanitizes security logger fallback output when structured logging fails' do + allow(Html2rss::Web::LogEvent).to receive(:emit).and_raise(JSON::GeneratorError, 'boom') + allow(Kernel).to receive(:warn) + emit_failing_token_usage_log + + expect(Kernel).to have_received(:warn).with('Structured logging fallback: JSON::GeneratorError: boom') + expect(fallback_warning_line).to eq(expected_fallback_warning_line) + end + it 'sanitizes observability details' do Html2rss::Web::Observability.emit( event_name: 'feed.render', @@ -93,11 +102,11 @@ lines = io.string.lines.map { |line| JSON.parse(line, symbolize_names: true) } observability_payload = lines.first - expect(observability_payload.dig(:details, :url)).to eq(sanitized_url) + expect(observability_payload.dig(:details, :url)).to eq(expected_news_url) end it 'formats rack-timeout logfmt as json' do - logger.info('source=rack-timeout id=req-123 timeout=15000ms state=completed') + Html2rss::Web::AppLogger.logger.info('source=rack-timeout id=req-123 timeout=15000ms state=completed') payload = JSON.parse(io.string.lines.last, symbolize_names: true) expect(payload).to include( @@ -122,12 +131,12 @@ def nested_url_payload # @return [Hash{Symbol=>Object}] def expected_nested_url_payload { - url: sanitized_url('news.ycombinator.com', 'https://news.ycombinator.com'), + url: expected_sanitized_url('news.ycombinator.com', 'https://news.ycombinator.com'), related_urls: [ - sanitized_url('example.com', 'https://example.com/feed.xml') + expected_sanitized_url('example.com', 'https://example.com/feed.xml') ], details: { - url: sanitized_url('lobste.rs', 'https://lobste.rs/s/test') + url: expected_sanitized_url('lobste.rs', 'https://lobste.rs/s/test') } } end @@ -135,7 +144,7 @@ def expected_nested_url_payload # @param host [String] # @param url [String] # @return [Hash{Symbol=>String}] - def sanitized_url(host, url) + def expected_sanitized_url(host, url) { host:, scheme: 'https', hash: url_hash(url) } end @@ -144,4 +153,30 @@ def sanitized_url(host, url) def url_hash(url) Digest::SHA256.hexdigest(url)[0..11] end + + # @return [void] + def emit_failing_token_usage_log + Html2rss::Web::SecurityLogger.log_token_usage( + 'very-secret-token', + 'https://news.ycombinator.com/private/feed', + true + ) + end + + # @return [String] + def fallback_warning_line + warning_messages = RSpec::Mocks.space.proxy_for(Kernel).messages_arg_list.map(&:first) + warning_messages.find { |message| message.include?('component=security_logger') } + end + + # @return [Hash{Symbol=>String}] + def sanitized_fallback_url + expected_sanitized_url('news.ycombinator.com', 'https://news.ycombinator.com/private/feed') + end + + # @return [String] + def expected_fallback_warning_line + 'component=security_logger security_event=token_usage ' \ + "details={success: true, url: #{sanitized_fallback_url.inspect}, token_hash: \"01cadf39\"}" + end end From 2dc9b7bd7137fdf7dee5f99b82fdc3078793ce0e Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Sun, 22 Mar 2026 13:56:05 +0100 Subject: [PATCH 7/7] feat: redaction only when path matches --- app/web/security/log_sanitizer.rb | 18 ++---------------- spec/html2rss/web/log_sanitizer_spec.rb | 6 ++++++ 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/app/web/security/log_sanitizer.rb b/app/web/security/log_sanitizer.rb index 3b79769e..6d4ff23c 100644 --- a/app/web/security/log_sanitizer.rb +++ b/app/web/security/log_sanitizer.rb @@ -8,7 +8,7 @@ module Web ## # Sanitizes request paths and log payloads before they are emitted. module LogSanitizer - FEED_TOKEN_ROUTE = %r{\A(/api/v1/feeds/)([^/?]+)\z} + FEED_TOKEN_ROUTE = %r{\A(/api/v1/feeds/)([^/?]+?)(\.(?:json|xml|rss))?\z} class << self # @param path [String, nil] @@ -16,11 +16,7 @@ class << self def sanitize_path(path) return if path.nil? - path_string = path.to_s - suffix = feed_suffix(path_string) - token_path = suffix ? path_string.delete_suffix(suffix) : path_string - - token_path.gsub(FEED_TOKEN_ROUTE, "\\1[REDACTED]#{suffix}") + path.to_s.gsub(FEED_TOKEN_ROUTE, '\1[REDACTED]\3') end # @param details [Hash] @@ -33,16 +29,6 @@ def sanitize_details(details) private - # @param path [String] - # @return [String, nil] - def feed_suffix(path) - return '.json' if path.end_with?('.json') - return '.xml' if path.end_with?('.xml') - return '.rss' if path.end_with?('.rss') - - nil - end - # @param key [Object] # @param value [Object] # @return [Object] diff --git a/spec/html2rss/web/log_sanitizer_spec.rb b/spec/html2rss/web/log_sanitizer_spec.rb index 1c15c237..b8653468 100644 --- a/spec/html2rss/web/log_sanitizer_spec.rb +++ b/spec/html2rss/web/log_sanitizer_spec.rb @@ -51,6 +51,12 @@ ).to eq('/api/v1/feeds/[REDACTED].xml') end + it 'leaves non-feed paths unchanged when they use supported suffixes', :aggregate_failures do + expect(described_class.sanitize_path('/api/v1/health.json')).to eq('/api/v1/health.json') + expect(described_class.sanitize_path('/api/v1/status.xml')).to eq('/api/v1/status.xml') + expect(described_class.sanitize_path('/feeds/public.rss')).to eq('/feeds/public.rss') + end + it 'replaces logged urls with hashed host metadata' do expect(described_class.sanitize_details(url: 'https://news.ycombinator.com')).to eq(url: expected_news_url) end