|
2 | 2 |
|
3 | 3 | require 'uri' |
4 | 4 | require_relative 'auth' |
| 5 | +require_relative 'xml_builder' |
5 | 6 |
|
6 | 7 | module Html2rss |
7 | 8 | module Web |
@@ -87,50 +88,64 @@ def generate_feed_from_stable_id(feed_id, token_data) |
87 | 88 | end |
88 | 89 |
|
89 | 90 | def generate_feed_content(url, strategy = 'ssrf_filter') |
90 | | - call_strategy(url, strategy) |
| 91 | + feed_content = call_strategy(url, strategy) |
| 92 | + |
| 93 | + # Check if feed is empty and provide better error handling |
| 94 | + if feed_content.respond_to?(:to_s) |
| 95 | + feed_xml = feed_content.to_s |
| 96 | + if feed_xml.include?('<item>') == false |
| 97 | + # Feed has no items - this might be a content extraction issue |
| 98 | + return create_empty_feed_warning(url, strategy) |
| 99 | + end |
| 100 | + end |
| 101 | + |
| 102 | + feed_content |
| 103 | + end |
| 104 | + |
| 105 | + def create_empty_feed_warning(url, strategy) |
| 106 | + site_title = extract_site_title(url) |
| 107 | + XmlBuilder.build_empty_feed_warning( |
| 108 | + url: url, |
| 109 | + strategy: strategy, |
| 110 | + site_title: site_title |
| 111 | + ) |
91 | 112 | end |
92 | 113 |
|
| 114 | + # rubocop:disable Metrics/MethodLength |
93 | 115 | def call_strategy(url, strategy) |
94 | 116 | config = { |
95 | 117 | stylesheets: [{ href: '/rss.xsl', type: 'text/xsl' }], |
96 | 118 | strategy: strategy.to_sym, |
97 | 119 | channel: { |
98 | 120 | url: url, |
99 | | - title: "Auto-generated feed for #{url}" |
| 121 | + title: extract_channel_title(url) |
100 | 122 | }, |
101 | | - auto_source: {} |
| 123 | + auto_source: { |
| 124 | + # Auto source configuration for automatic content detection |
| 125 | + # This allows Html2rss to automatically detect content on the page |
| 126 | + } |
102 | 127 | } |
103 | 128 |
|
104 | 129 | Html2rss.feed(config) |
105 | 130 | end |
| 131 | + # rubocop:enable Metrics/MethodLength |
106 | 132 |
|
107 | | - def error_feed(message) |
108 | | - sanitized_message = Auth.sanitize_xml(message) |
109 | | - build_rss_feed('Error', "Failed to generate auto-source feed: #{sanitized_message}", sanitized_message) |
| 133 | + def extract_channel_title(url) |
| 134 | + Html2rss::Url.for_channel(url).channel_titleized || 'RSS Feed' |
110 | 135 | end |
111 | 136 |
|
112 | | - def access_denied_feed(url) |
113 | | - sanitized_url = Auth.sanitize_xml(url) |
114 | | - title = 'Access Denied' |
115 | | - description = 'This URL is not allowed for public auto source generation.' |
116 | | - item_description = "URL '#{sanitized_url}' is not in the allowed list for public auto source." |
117 | | - build_rss_feed(title, description, item_description) |
| 137 | + def extract_site_title(url) |
| 138 | + Html2rss::Url.for_channel(url).channel_titleized |
| 139 | + rescue StandardError |
| 140 | + nil |
| 141 | + end |
| 142 | + |
| 143 | + def error_feed(message) |
| 144 | + XmlBuilder.build_error_feed(message: message) |
118 | 145 | end |
119 | 146 |
|
120 | | - def build_rss_feed(title, description, item_description) |
121 | | - <<~RSS |
122 | | - <?xml version="1.0" encoding="UTF-8"?> |
123 | | - <rss version="2.0"> |
124 | | - <channel> |
125 | | - <title>#{title}</title> |
126 | | - <description>#{description}</description> |
127 | | - <item> |
128 | | - <title>#{title}</title> |
129 | | - <description>#{item_description}</description> |
130 | | - </item> |
131 | | - </channel> |
132 | | - </rss> |
133 | | - RSS |
| 147 | + def access_denied_feed(url) |
| 148 | + XmlBuilder.build_access_denied_feed(url) |
134 | 149 | end |
135 | 150 | end |
136 | 151 | end |
|
0 commit comments