Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions examples/deployment/.env
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@ RACK_ENV=production
# Generate with: openssl rand -hex 32
HTML2RSS_SECRET_KEY=replace-with-64-hex-characters-generated-by-openssl-rand-hex-32

# Authenticated health endpoint token
# Required by the documented Compose stack.
# If you build a custom stack and probe only /api/v1/health/live and /api/v1/health/ready,
# you can omit this value.
HEALTH_CHECK_TOKEN=replace-with-strong-health-token
# Web UI / feed creation token
# Paste this into the web app when it asks for an access token.
HTML2RSS_ACCESS_TOKEN=replace-with-strong-access-token

# Auto source (optional; keep false unless you need automatic feed generation)
AUTO_SOURCE_ENABLED=false
# Automatic feed generation
AUTO_SOURCE_ENABLED=true

# Observability (optional)
#SENTRY_DSN=
Expand Down
45 changes: 6 additions & 39 deletions examples/deployment/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,14 @@ services:
html2rss-web:
image: html2rss/web:1
restart: unless-stopped
ports:
- "127.0.0.1:4000:4000"
env_file:
- path: .env
required: false
environment:
PORT: 4000
BOTASAURUS_SCRAPER_URL: http://botasaurus:4010

botasaurus:
image: html2rss/botasaurus-scrape-api:latest
restart: unless-stopped

caddy:
image: caddy:2-alpine
depends_on:
- html2rss-web
command:
- caddy
- reverse-proxy
- --from
- ${CADDY_HOST}
- --to
- html2rss-web:4000
ports:
- "80:80"
- "443:443"
volumes:
- caddy_data:/data

watchtower:
image: containrrr/watchtower
depends_on:
- html2rss-web
- caddy
- botasaurus
command:
- --cleanup
- --interval
- "7200"
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
restart: unless-stopped

volumes:
caddy_data:
RACK_ENV: production
HTML2RSS_SECRET_KEY: ${HTML2RSS_SECRET_KEY}
HTML2RSS_ACCESS_TOKEN: ${HTML2RSS_ACCESS_TOKEN}
AUTO_SOURCE_ENABLED: ${AUTO_SOURCE_ENABLED}
3 changes: 2 additions & 1 deletion src/components/docs/AutoGenerationOptional.astro
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { Aside } from "@astrojs/starlight/components";
---

<Aside type="note" title="Automatic generation may be disabled">
The direct `Create a feed` workflow is not enabled on every deployment. If you want that path, continue with
Included feeds are the fallback path when the packaged catalog already covers your site. If you want the
primary page-URL flow on your own instance, continue with
<a href="/web-application/how-to/use-automatic-feed-generation/">Use automatic feed generation</a>.
</Aside>
47 changes: 10 additions & 37 deletions src/components/docs/DockerComposeSnippet.astro
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
import { Code } from "@astrojs/starlight/components";
import { botasaurusImage, browserlessImage, caddyImage, watchtowerImage, webImage } from "../../data/docker";
import { botasaurusImage, caddyImage, watchtowerImage, webImage } from "../../data/docker";

interface Props {
variant: "minimal" | "productionCaddy" | "secure" | "watchtower" | "resourceGuardrails";
Expand All @@ -22,25 +22,14 @@ const snippets: Record<Props["variant"], string> = {
RACK_ENV: production
PORT: 4000
HTML2RSS_SECRET_KEY: \${HTML2RSS_SECRET_KEY:?set HTML2RSS_SECRET_KEY}
HEALTH_CHECK_TOKEN: \${HEALTH_CHECK_TOKEN:?set HEALTH_CHECK_TOKEN}
HTML2RSS_ACCESS_TOKEN: \${HTML2RSS_ACCESS_TOKEN:?set HTML2RSS_ACCESS_TOKEN}
SENTRY_DSN: \${SENTRY_DSN:-}
BROWSERLESS_IO_WEBSOCKET_URL: ws://browserless:4002
BROWSERLESS_IO_API_TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}
AUTO_SOURCE_ENABLED: "true"
BOTASAURUS_SCRAPER_URL: http://botasaurus:4010

botasaurus:
image: ${botasaurusImage}
restart: unless-stopped

browserless:
image: "${browserlessImage}"
restart: unless-stopped
ports:
- "127.0.0.1:4002:4002"
environment:
PORT: 4002
CONCURRENT: 10
TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}`,
restart: unless-stopped`,
productionCaddy: `services:
caddy:
image: ${caddyImage}
Expand Down Expand Up @@ -69,23 +58,15 @@ const snippets: Record<Props["variant"], string> = {
PORT: 4000
HTML2RSS_SECRET_KEY: \${HTML2RSS_SECRET_KEY:?set HTML2RSS_SECRET_KEY}
HEALTH_CHECK_TOKEN: \${HEALTH_CHECK_TOKEN:?set HEALTH_CHECK_TOKEN}
HTML2RSS_ACCESS_TOKEN: \${HTML2RSS_ACCESS_TOKEN:?set HTML2RSS_ACCESS_TOKEN}
AUTO_SOURCE_ENABLED: "true"
SENTRY_DSN: \${SENTRY_DSN:-}
BROWSERLESS_IO_WEBSOCKET_URL: ws://browserless:4002
BROWSERLESS_IO_API_TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}
BOTASAURUS_SCRAPER_URL: http://botasaurus:4010

botasaurus:
image: ${botasaurusImage}
restart: unless-stopped

browserless:
image: "${browserlessImage}"
restart: unless-stopped
environment:
PORT: 4002
CONCURRENT: 10
TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}

volumes:
caddy_data:`,
secure: `services:
Expand All @@ -100,22 +81,14 @@ volumes:
PORT: 4000
HTML2RSS_SECRET_KEY: \${HTML2RSS_SECRET_KEY:?set HTML2RSS_SECRET_KEY}
HEALTH_CHECK_TOKEN: \${HEALTH_CHECK_TOKEN:?set HEALTH_CHECK_TOKEN}
HTML2RSS_ACCESS_TOKEN: \${HTML2RSS_ACCESS_TOKEN:?set HTML2RSS_ACCESS_TOKEN}
AUTO_SOURCE_ENABLED: "true"
SENTRY_DSN: \${SENTRY_DSN:-}
BROWSERLESS_IO_WEBSOCKET_URL: ws://browserless:4002
BROWSERLESS_IO_API_TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}
BOTASAURUS_SCRAPER_URL: http://botasaurus:4010

botasaurus:
image: ${botasaurusImage}
restart: unless-stopped

browserless:
image: "${browserlessImage}"
restart: unless-stopped
environment:
PORT: 4002
CONCURRENT: 10
TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}`,
restart: unless-stopped`,
watchtower: `services:
watchtower:
image: ${watchtowerImage}
Expand All @@ -124,7 +97,7 @@ volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
# Optional for private registries only:
# - "\${HOME}/.docker/config.json:/config.json:ro"
command: --cleanup --interval 7200 html2rss-web botasaurus browserless caddy`,
command: --cleanup --interval 7200 html2rss-web botasaurus caddy`,
resourceGuardrails: `services:
html2rss-web:
image: ${webImage}
Expand Down
2 changes: 2 additions & 0 deletions src/content/docs/feed-directory/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import FeedDirectory from "../../../components/FeedDirectory.astro";

---

Need the main onboarding path first? Start with [Getting Started](/web-application/getting-started) and create a feed from your own page URL. The directory below is the packaged fallback/catalog path.

Need a different instance? You can use the built-in default, self-host your own, or find more options on the [community-run wiki](https://github.com/html2rss/html2rss-web/wiki/Instances).

[🚀 Host Your Own Instance (and share it!)](/web-application/how-to/deployment)
Expand Down
11 changes: 6 additions & 5 deletions src/content/docs/get-involved/self-hosting.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,20 @@ sidebar:
order: 3
---

This page is the short routing point for self-hosting. The current setup and deployment instructions live under the `html2rss-web` docs so the Docker, token, and Browserless guidance only exists in one place.
This page is the short routing point for self-hosting. The current setup and deployment instructions live under the `html2rss-web` docs so the Docker, `.env`, token, and generated-feed guidance only exists in one place.

## Recommended Path

1. **[Run html2rss-web locally](/web-application/getting-started/)** to verify your own instance with an included feed first.
1. **[Run html2rss-web locally](/web-application/getting-started/)** to verify your own instance with a generated feed first.
2. **[Deploy html2rss-web to production](/web-application/how-to/deployment/)** when you are ready to expose or operate it.
3. **[Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/)** only if you want the token-gated page-URL workflow.
3. **[Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/)** to keep the token-gated page-URL workflow enabled on your instance.

## What To Expect

- `html2rss-web` is the recommended self-hosted product surface.
- Included feeds are the lowest-maintenance way to prove a deployment.
- Automatic feed generation is disabled by default in production.
- Automatic feed generation is the main self-hosted onboarding flow.
- Included feeds are the packaged fallback path when that catalog already covers the site you want.
- Automatic feed generation is disabled by default in production unless you enable it.
- The generated API contract is published as OpenAPI at `/openapi.yaml`.
- Custom config work belongs in the core `html2rss` docs and JSON Schema.

Expand Down
11 changes: 6 additions & 5 deletions src/content/docs/getting-started.mdx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: "Getting Started"
description: "Start html2rss-web locally, verify one feed, and decide when to enable automatic generation or move to custom configs."
description: "Start html2rss-web locally, paste your own URL, enter one token, and open the generated feed."
sidebar:
order: 1
---
Expand All @@ -16,14 +16,15 @@ If you want the recommended path, go to [Run html2rss-web with Docker](/web-appl
That guide is the canonical setup flow for:

- running `html2rss-web` locally
- confirming the interface is working
- opening a known feed URL
- deciding when to use automatic generation or custom configs
- creating a feed from your own page URL
- entering the token from your local setup
- choosing fallback or custom-config paths only when needed

## Quick Shortcuts

- **[Run html2rss-web with Docker](/web-application/getting-started)**: recommended first step
- **[Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/)**: enable direct feed creation from a page URL when you want that workflow
- **[Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/)**: understand the normal direct page-URL workflow
- **[Use the included configs](/web-application/how-to/use-included-configs/)**: fallback when the packaged catalog already covers your site
- **[Create Custom Feeds](/creating-custom-feeds)**: write configs when you need more control
- **[Troubleshooting Guide](/troubleshooting/troubleshooting)**: fix startup or extraction problems

Expand Down
17 changes: 9 additions & 8 deletions src/content/docs/index.mdx
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
---
title: "Turn Any Website Into an RSS Feed"
description: "Run html2rss-web with Docker, verify one feed, then enable automatic generation or move to custom configs when you need more control."
description: "Run html2rss-web with Docker, paste your own page URL, enter one token, and open the generated feed."
---

Run `html2rss-web` with Docker, verify one feed from your own instance, then decide whether you need automatic generation or custom configs.
Run `html2rss-web` with Docker, paste your own page URL, enter one access token, and open the generated feed from your own instance.

## Start Here

Expand All @@ -12,9 +12,9 @@ Run `html2rss-web` with Docker, verify one feed from your own instance, then dec
That guide is the canonical onboarding flow for:

- starting a local instance
- verifying the web interface
- opening a known feed URL
- choosing the next path
- creating a generated feed from your own page URL
- entering the token from your local setup
- choosing the fallback or advanced path only when needed

## What is html2rss?

Expand All @@ -30,7 +30,7 @@ Most people should start with the web application:
### I want a working instance first

1. **[Run html2rss-web with Docker](/web-application/getting-started)**: recommended starting path
2. **[Use the included configs](/web-application/how-to/use-included-configs/)**: optional guide for the embedded feed set
2. **[Use the included configs](/web-application/how-to/use-included-configs/)**: fallback guide when the packaged feed set already covers your site

### I need more control

Expand All @@ -55,8 +55,9 @@ Most people should start with the web application:
## Practical Notes

- Start with Docker, not a public instance.
- Verify the deployment with one known feed first.
- Enable automatic generation only when you want the direct page-URL workflow and are ready to allow it on your self-hosted instance.
- Start with your own listing, newsroom, changelog, or updates URL.
- Automatic page-to-feed generation is the normal path.
- Use included configs when the packaged catalog already covers your site.
- Move to custom configs when you need a stable, reviewable setup.

**Need help?** Continue to the [troubleshooting guide](/troubleshooting/troubleshooting) or join [GitHub Discussions](https://github.com/orgs/html2rss/discussions).
6 changes: 4 additions & 2 deletions src/content/docs/troubleshooting/troubleshooting.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -144,14 +144,16 @@ If you are getting a "command not found" error, try the following:

### Authentication Errors

- **401 Unauthorized when creating feeds:** The create-feed API expects a bearer token. Re-enter a valid access token in the UI or send `Authorization: Bearer ...` to `POST /api/v1/feeds`.
- **401 Unauthorized when creating feeds:** Re-enter the same access token you set as `HTML2RSS_ACCESS_TOKEN` for your instance.
- **403 Forbidden when creating feeds:** Automatic feed generation may be disabled (`AUTO_SOURCE_ENABLED=false`) or the requested URL may not be allowed for the authenticated account.
- **500 Internal Server Error:** Check the application logs for detailed error information.
- **Health endpoint failures:** Use `GET /api/v1/health/live`, `GET /api/v1/health/ready`, or authenticated `GET /api/v1/health` depending on which probe you are testing.

### Feed Problems

- Some sites may require JavaScript rendering; ensure the `browserless` service is running
- Try a more specific listing, newsroom, changelog, or updates URL before changing infrastructure
- If your first-run stack includes Botasaurus, ensure the `botasaurus` service is running
- Add Browserless later only when harder sites prove they need it
- Check the feed configuration in `feeds.yml` for typos or invalid selectors
- Look for parsing errors in the logs:
<Code code={`docker compose logs html2rss-web`} lang="bash" />
Expand Down
Loading
Loading