diff --git a/examples/deployment/.env b/examples/deployment/.env
index 62f8201d..21edc3ba 100644
--- a/examples/deployment/.env
+++ b/examples/deployment/.env
@@ -8,14 +8,12 @@ RACK_ENV=production
# Generate with: openssl rand -hex 32
HTML2RSS_SECRET_KEY=replace-with-64-hex-characters-generated-by-openssl-rand-hex-32
-# Authenticated health endpoint token
-# Required by the documented Compose stack.
-# If you build a custom stack and probe only /api/v1/health/live and /api/v1/health/ready,
-# you can omit this value.
-HEALTH_CHECK_TOKEN=replace-with-strong-health-token
+# Web UI / feed creation token
+# Paste this into the web app when it asks for an access token.
+HTML2RSS_ACCESS_TOKEN=replace-with-strong-access-token
-# Auto source (optional; keep false unless you need automatic feed generation)
-AUTO_SOURCE_ENABLED=false
+# Automatic feed generation
+AUTO_SOURCE_ENABLED=true
# Observability (optional)
#SENTRY_DSN=
diff --git a/examples/deployment/docker-compose.yml b/examples/deployment/docker-compose.yml
index 3774a619..96e8cc73 100644
--- a/examples/deployment/docker-compose.yml
+++ b/examples/deployment/docker-compose.yml
@@ -2,47 +2,14 @@ services:
html2rss-web:
image: html2rss/web:1
restart: unless-stopped
+ ports:
+ - "127.0.0.1:4000:4000"
env_file:
- path: .env
required: false
environment:
PORT: 4000
- BOTASAURUS_SCRAPER_URL: http://botasaurus:4010
-
- botasaurus:
- image: html2rss/botasaurus-scrape-api:latest
- restart: unless-stopped
-
- caddy:
- image: caddy:2-alpine
- depends_on:
- - html2rss-web
- command:
- - caddy
- - reverse-proxy
- - --from
- - ${CADDY_HOST}
- - --to
- - html2rss-web:4000
- ports:
- - "80:80"
- - "443:443"
- volumes:
- - caddy_data:/data
-
- watchtower:
- image: containrrr/watchtower
- depends_on:
- - html2rss-web
- - caddy
- - botasaurus
- command:
- - --cleanup
- - --interval
- - "7200"
- volumes:
- - /var/run/docker.sock:/var/run/docker.sock:ro
- restart: unless-stopped
-
-volumes:
- caddy_data:
+ RACK_ENV: production
+ HTML2RSS_SECRET_KEY: ${HTML2RSS_SECRET_KEY}
+ HTML2RSS_ACCESS_TOKEN: ${HTML2RSS_ACCESS_TOKEN}
+ AUTO_SOURCE_ENABLED: ${AUTO_SOURCE_ENABLED}
diff --git a/src/components/docs/AutoGenerationOptional.astro b/src/components/docs/AutoGenerationOptional.astro
index 982682bb..796e13d3 100644
--- a/src/components/docs/AutoGenerationOptional.astro
+++ b/src/components/docs/AutoGenerationOptional.astro
@@ -3,6 +3,7 @@ import { Aside } from "@astrojs/starlight/components";
---
diff --git a/src/components/docs/DockerComposeSnippet.astro b/src/components/docs/DockerComposeSnippet.astro
index 0ced6c48..58960c5c 100644
--- a/src/components/docs/DockerComposeSnippet.astro
+++ b/src/components/docs/DockerComposeSnippet.astro
@@ -1,6 +1,6 @@
---
import { Code } from "@astrojs/starlight/components";
-import { botasaurusImage, browserlessImage, caddyImage, watchtowerImage, webImage } from "../../data/docker";
+import { botasaurusImage, caddyImage, watchtowerImage, webImage } from "../../data/docker";
interface Props {
variant: "minimal" | "productionCaddy" | "secure" | "watchtower" | "resourceGuardrails";
@@ -22,25 +22,14 @@ const snippets: Record = {
RACK_ENV: production
PORT: 4000
HTML2RSS_SECRET_KEY: \${HTML2RSS_SECRET_KEY:?set HTML2RSS_SECRET_KEY}
- HEALTH_CHECK_TOKEN: \${HEALTH_CHECK_TOKEN:?set HEALTH_CHECK_TOKEN}
+ HTML2RSS_ACCESS_TOKEN: \${HTML2RSS_ACCESS_TOKEN:?set HTML2RSS_ACCESS_TOKEN}
SENTRY_DSN: \${SENTRY_DSN:-}
- BROWSERLESS_IO_WEBSOCKET_URL: ws://browserless:4002
- BROWSERLESS_IO_API_TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}
+ AUTO_SOURCE_ENABLED: "true"
BOTASAURUS_SCRAPER_URL: http://botasaurus:4010
botasaurus:
image: ${botasaurusImage}
- restart: unless-stopped
-
- browserless:
- image: "${browserlessImage}"
- restart: unless-stopped
- ports:
- - "127.0.0.1:4002:4002"
- environment:
- PORT: 4002
- CONCURRENT: 10
- TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}`,
+ restart: unless-stopped`,
productionCaddy: `services:
caddy:
image: ${caddyImage}
@@ -69,23 +58,15 @@ const snippets: Record = {
PORT: 4000
HTML2RSS_SECRET_KEY: \${HTML2RSS_SECRET_KEY:?set HTML2RSS_SECRET_KEY}
HEALTH_CHECK_TOKEN: \${HEALTH_CHECK_TOKEN:?set HEALTH_CHECK_TOKEN}
+ HTML2RSS_ACCESS_TOKEN: \${HTML2RSS_ACCESS_TOKEN:?set HTML2RSS_ACCESS_TOKEN}
+ AUTO_SOURCE_ENABLED: "true"
SENTRY_DSN: \${SENTRY_DSN:-}
- BROWSERLESS_IO_WEBSOCKET_URL: ws://browserless:4002
- BROWSERLESS_IO_API_TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}
BOTASAURUS_SCRAPER_URL: http://botasaurus:4010
botasaurus:
image: ${botasaurusImage}
restart: unless-stopped
- browserless:
- image: "${browserlessImage}"
- restart: unless-stopped
- environment:
- PORT: 4002
- CONCURRENT: 10
- TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}
-
volumes:
caddy_data:`,
secure: `services:
@@ -100,22 +81,14 @@ volumes:
PORT: 4000
HTML2RSS_SECRET_KEY: \${HTML2RSS_SECRET_KEY:?set HTML2RSS_SECRET_KEY}
HEALTH_CHECK_TOKEN: \${HEALTH_CHECK_TOKEN:?set HEALTH_CHECK_TOKEN}
+ HTML2RSS_ACCESS_TOKEN: \${HTML2RSS_ACCESS_TOKEN:?set HTML2RSS_ACCESS_TOKEN}
+ AUTO_SOURCE_ENABLED: "true"
SENTRY_DSN: \${SENTRY_DSN:-}
- BROWSERLESS_IO_WEBSOCKET_URL: ws://browserless:4002
- BROWSERLESS_IO_API_TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}
BOTASAURUS_SCRAPER_URL: http://botasaurus:4010
botasaurus:
image: ${botasaurusImage}
- restart: unless-stopped
-
- browserless:
- image: "${browserlessImage}"
- restart: unless-stopped
- environment:
- PORT: 4002
- CONCURRENT: 10
- TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}`,
+ restart: unless-stopped`,
watchtower: `services:
watchtower:
image: ${watchtowerImage}
@@ -124,7 +97,7 @@ volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
# Optional for private registries only:
# - "\${HOME}/.docker/config.json:/config.json:ro"
- command: --cleanup --interval 7200 html2rss-web botasaurus browserless caddy`,
+ command: --cleanup --interval 7200 html2rss-web botasaurus caddy`,
resourceGuardrails: `services:
html2rss-web:
image: ${webImage}
diff --git a/src/content/docs/feed-directory/index.mdx b/src/content/docs/feed-directory/index.mdx
index 4b75c0c1..92700aab 100644
--- a/src/content/docs/feed-directory/index.mdx
+++ b/src/content/docs/feed-directory/index.mdx
@@ -14,6 +14,8 @@ import FeedDirectory from "../../../components/FeedDirectory.astro";
---
+Need the main onboarding path first? Start with [Getting Started](/web-application/getting-started) and create a feed from your own page URL. The directory below is the packaged fallback/catalog path.
+
Need a different instance? You can use the built-in default, self-host your own, or find more options on the [community-run wiki](https://github.com/html2rss/html2rss-web/wiki/Instances).
[🚀 Host Your Own Instance (and share it!)](/web-application/how-to/deployment)
diff --git a/src/content/docs/get-involved/self-hosting.mdx b/src/content/docs/get-involved/self-hosting.mdx
index 111768cb..1468f265 100644
--- a/src/content/docs/get-involved/self-hosting.mdx
+++ b/src/content/docs/get-involved/self-hosting.mdx
@@ -5,19 +5,20 @@ sidebar:
order: 3
---
-This page is the short routing point for self-hosting. The current setup and deployment instructions live under the `html2rss-web` docs so the Docker, token, and Browserless guidance only exists in one place.
+This page is the short routing point for self-hosting. The current setup and deployment instructions live under the `html2rss-web` docs so the Docker, `.env`, token, and generated-feed guidance only exists in one place.
## Recommended Path
-1. **[Run html2rss-web locally](/web-application/getting-started/)** to verify your own instance with an included feed first.
+1. **[Run html2rss-web locally](/web-application/getting-started/)** to verify your own instance with a generated feed first.
2. **[Deploy html2rss-web to production](/web-application/how-to/deployment/)** when you are ready to expose or operate it.
-3. **[Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/)** only if you want the token-gated page-URL workflow.
+3. **[Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/)** to keep the token-gated page-URL workflow enabled on your instance.
## What To Expect
- `html2rss-web` is the recommended self-hosted product surface.
-- Included feeds are the lowest-maintenance way to prove a deployment.
-- Automatic feed generation is disabled by default in production.
+- Automatic feed generation is the main self-hosted onboarding flow.
+- Included feeds are the packaged fallback path when that catalog already covers the site you want.
+- Automatic feed generation is disabled by default in production unless you enable it.
- The generated API contract is published as OpenAPI at `/openapi.yaml`.
- Custom config work belongs in the core `html2rss` docs and JSON Schema.
diff --git a/src/content/docs/getting-started.mdx b/src/content/docs/getting-started.mdx
index aa60c728..6ee1b207 100644
--- a/src/content/docs/getting-started.mdx
+++ b/src/content/docs/getting-started.mdx
@@ -1,6 +1,6 @@
---
title: "Getting Started"
-description: "Start html2rss-web locally, verify one feed, and decide when to enable automatic generation or move to custom configs."
+description: "Start html2rss-web locally, paste your own URL, enter one token, and open the generated feed."
sidebar:
order: 1
---
@@ -16,14 +16,15 @@ If you want the recommended path, go to [Run html2rss-web with Docker](/web-appl
That guide is the canonical setup flow for:
- running `html2rss-web` locally
-- confirming the interface is working
-- opening a known feed URL
-- deciding when to use automatic generation or custom configs
+- creating a feed from your own page URL
+- entering the token from your local setup
+- choosing fallback or custom-config paths only when needed
## Quick Shortcuts
- **[Run html2rss-web with Docker](/web-application/getting-started)**: recommended first step
-- **[Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/)**: enable direct feed creation from a page URL when you want that workflow
+- **[Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/)**: understand the normal direct page-URL workflow
+- **[Use the included configs](/web-application/how-to/use-included-configs/)**: fallback when the packaged catalog already covers your site
- **[Create Custom Feeds](/creating-custom-feeds)**: write configs when you need more control
- **[Troubleshooting Guide](/troubleshooting/troubleshooting)**: fix startup or extraction problems
diff --git a/src/content/docs/index.mdx b/src/content/docs/index.mdx
index a98d87de..b05e6c07 100644
--- a/src/content/docs/index.mdx
+++ b/src/content/docs/index.mdx
@@ -1,9 +1,9 @@
---
title: "Turn Any Website Into an RSS Feed"
-description: "Run html2rss-web with Docker, verify one feed, then enable automatic generation or move to custom configs when you need more control."
+description: "Run html2rss-web with Docker, paste your own page URL, enter one token, and open the generated feed."
---
-Run `html2rss-web` with Docker, verify one feed from your own instance, then decide whether you need automatic generation or custom configs.
+Run `html2rss-web` with Docker, paste your own page URL, enter one access token, and open the generated feed from your own instance.
## Start Here
@@ -12,9 +12,9 @@ Run `html2rss-web` with Docker, verify one feed from your own instance, then dec
That guide is the canonical onboarding flow for:
- starting a local instance
-- verifying the web interface
-- opening a known feed URL
-- choosing the next path
+- creating a generated feed from your own page URL
+- entering the token from your local setup
+- choosing the fallback or advanced path only when needed
## What is html2rss?
@@ -30,7 +30,7 @@ Most people should start with the web application:
### I want a working instance first
1. **[Run html2rss-web with Docker](/web-application/getting-started)**: recommended starting path
-2. **[Use the included configs](/web-application/how-to/use-included-configs/)**: optional guide for the embedded feed set
+2. **[Use the included configs](/web-application/how-to/use-included-configs/)**: fallback guide when the packaged feed set already covers your site
### I need more control
@@ -55,8 +55,9 @@ Most people should start with the web application:
## Practical Notes
- Start with Docker, not a public instance.
-- Verify the deployment with one known feed first.
-- Enable automatic generation only when you want the direct page-URL workflow and are ready to allow it on your self-hosted instance.
+- Start with your own listing, newsroom, changelog, or updates URL.
+- Automatic page-to-feed generation is the normal path.
+- Use included configs when the packaged catalog already covers your site.
- Move to custom configs when you need a stable, reviewable setup.
**Need help?** Continue to the [troubleshooting guide](/troubleshooting/troubleshooting) or join [GitHub Discussions](https://github.com/orgs/html2rss/discussions).
diff --git a/src/content/docs/troubleshooting/troubleshooting.mdx b/src/content/docs/troubleshooting/troubleshooting.mdx
index dcb11e2f..0dc53f8a 100644
--- a/src/content/docs/troubleshooting/troubleshooting.mdx
+++ b/src/content/docs/troubleshooting/troubleshooting.mdx
@@ -144,14 +144,16 @@ If you are getting a "command not found" error, try the following:
### Authentication Errors
-- **401 Unauthorized when creating feeds:** The create-feed API expects a bearer token. Re-enter a valid access token in the UI or send `Authorization: Bearer ...` to `POST /api/v1/feeds`.
+- **401 Unauthorized when creating feeds:** Re-enter the same access token you set as `HTML2RSS_ACCESS_TOKEN` for your instance.
- **403 Forbidden when creating feeds:** Automatic feed generation may be disabled (`AUTO_SOURCE_ENABLED=false`) or the requested URL may not be allowed for the authenticated account.
- **500 Internal Server Error:** Check the application logs for detailed error information.
- **Health endpoint failures:** Use `GET /api/v1/health/live`, `GET /api/v1/health/ready`, or authenticated `GET /api/v1/health` depending on which probe you are testing.
### Feed Problems
-- Some sites may require JavaScript rendering; ensure the `browserless` service is running
+- Try a more specific listing, newsroom, changelog, or updates URL before changing infrastructure
+- If your first-run stack includes Botasaurus, ensure the `botasaurus` service is running
+- Add Browserless later only when harder sites prove they need it
- Check the feed configuration in `feeds.yml` for typos or invalid selectors
- Look for parsing errors in the logs:
diff --git a/src/content/docs/web-application/getting-started.mdx b/src/content/docs/web-application/getting-started.mdx
index 00025fb6..9d22f105 100644
--- a/src/content/docs/web-application/getting-started.mdx
+++ b/src/content/docs/web-application/getting-started.mdx
@@ -1,105 +1,114 @@
---
title: "Getting Started"
-description: "Run html2rss-web locally with Docker, verify the interface, and open your first included feed."
+description: "Run html2rss-web locally with Docker, paste your own page URL, enter one token, and open the generated feed."
sidebar:
order: 2
---
import { Code } from "@astrojs/starlight/components";
-import AutoGenerationOptional from "../../../components/docs/AutoGenerationOptional.astro";
import MinimalDockerCompose from "../../../components/docs/MinimalDockerCompose.astro";
-Run `html2rss-web` locally with Docker and verify one included feed before enabling direct feed generation.
+Run `html2rss-web` locally with Docker, then create your first feed from your own page URL.
## What You Will Have When This Works
After this guide, you should have:
- `html2rss-web` running at `http://localhost:4000`
-- the web interface loading correctly
-- a first included feed URL you can copy into your reader
-- a clear path to either token-gated feed generation or custom configs
+- one access token stored in your local `.env`
+- a generated feed URL from your own page URL
+- a clear fallback path when direct generation is not the right fit
-## Installation Guide
-
-This guide uses a local Docker Compose stack.
-
-### What You'll Need
+## What You'll Need
- **Docker**
- **About 10 minutes**
If you do not already have Docker, [install it first](https://docs.docker.com/get-started/).
-### Step 1: Create a Folder
-
-Create a new folder for `html2rss-web`:
+## Step 1: Create a Folder
-### Step 2: Create a Minimal Configuration File
+## Step 2: Copy `docker-compose.yml`
-Create a file called `docker-compose.yml` in that folder and start with the minimal local stack:
+Create `docker-compose.yml` with the minimal local stack:
-Add automatic updates, reverse proxying, or your own config file after this first run works.
+This first-run stack keeps the path narrow:
-### Step 3: Start html2rss-web
+- `html2rss-web`
+- `botasaurus`
+- automatic feed generation enabled
+- no reverse proxy
+- no Watchtower
+- no Browserless
+- no health-check token setup
-Create a `.env` file in the same folder (minimum required values for this stack):
+## Step 3: Copy `.env`
+
+Create `.env` beside `docker-compose.yml`:
.env <
-Then run:
+## Step 4: Start the Stack
## First Success Check
1. Open `http://localhost:4000`
-2. Confirm the web interface loads
-3. Open one of the included feed URLs from your own instance:
- - `http://localhost:4000/microsoft.com/azure-products.rss`
- - `http://localhost:4000/phys.org/weekly.rss`
- - `http://localhost:4000/softwareleadweekly.com/issues.rss`
-4. Confirm the feed opens
-5. Copy that feed URL into your reader if you want to keep it
+2. Paste your own page URL into `Page URL`
+3. Start with a listing, newsroom, changelog, releases, or updates page instead of a homepage
+4. Enter the access token from `.env` when prompted
+5. Open the generated feed URL
+
+If that works, your instance is ready for the core `url -> rss` workflow.
+
+## If The First URL Is Poor
+
+Automatic mode works best when the input URL is already a page that lists updates.
+
+Higher-success inputs:
+
+- newsroom or press pages
+- changelog or release pages
+- category, tag, archive, or listing pages
-If that works, the local app and included-feed path are ready.
+Lower-success inputs:
-## What Changes If You Enable Feed Generation
+- generic homepages
+- search pages
+- app-shell entry pages
-Automatic feed generation is off by default in production. When you enable it later:
+When output quality is poor, change the input URL first before assuming setup is broken.
-- the web app creates feeds through `POST /api/v1/feeds`
-- that API requires a bearer token
-- the UI starts with `faraday` and automatically retries once with `browserless` when appropriate
-- Browserless still needs to be configured for JavaScript-heavy pages
+## Fallback Path
-If you are integrating this flow programmatically, the generated OpenAPI is available at `/openapi.yaml`.
+If the site you want is already in the packaged catalog, or direct generation is not the best fit, use the built-in feed set from your own instance.
-
+- [Use the included configs](/web-application/how-to/use-included-configs/)
+- [Feed Directory](/feed-directory/)
## Next Steps
-1. **[Use the included configs](/web-application/how-to/use-included-configs/)**: understand how built-in feed paths work
-2. **[Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/)**: enable direct feed creation from page URLs when you want that workflow
-3. **[Create Custom Feeds](/creating-custom-feeds)**: write your own configs when you need reviewable extraction rules
+1. **[Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/)**: understand the normal page-URL flow in more detail
+2. **[Use the included configs](/web-application/how-to/use-included-configs/)**: use the fallback catalog when it already covers your site
+3. **[Create Custom Feeds](/creating-custom-feeds)**: write your own config when you need reviewable extraction rules
4. **[Need help?](/troubleshooting/troubleshooting)**: troubleshoot startup and extraction problems
diff --git a/src/content/docs/web-application/how-to/deployment.mdx b/src/content/docs/web-application/how-to/deployment.mdx
index 3fd26161..d1d00e15 100644
--- a/src/content/docs/web-application/how-to/deployment.mdx
+++ b/src/content/docs/web-application/how-to/deployment.mdx
@@ -1,13 +1,13 @@
---
title: "Deployment & Production"
-description: "Deploy html2rss-web with Docker, keep the included-feed path simple, and only enable token-gated feed generation when you are ready to operate it."
+description: "Deploy html2rss-web with Docker, keep first success small, then add production pieces after the local token-gated flow works."
---
import { Code } from "@astrojs/starlight/components";
import DockerComposeSnippet from "../../../../components/docs/DockerComposeSnippet.astro";
-html2rss-web ships on Docker Hub. Start with the [Getting Started guide](/web-application/getting-started), then add the production pieces below.
+html2rss-web ships on Docker Hub. Start with the [Getting Started guide](/web-application/getting-started), make sure `docker-compose.yml` + `.env` + your own URL + token + generated feed work first, then add the production pieces below.
The examples use `html2rss/web:1`, the recommended major-version tag. Pin an exact release if your deployment process requires it.
@@ -15,10 +15,10 @@ The examples use `html2rss/web:1`, the recommended major-version tag. Pin an exa
There are two materially different deployment modes:
-- **Included feeds only:** lowest-maintenance path, suitable when the packaged feed set already covers your needs
-- **Included feeds plus automatic generation:** requires `AUTO_SOURCE_ENABLED=true`, bearer-token distribution, and Browserless capacity planning
+- **Automatic generation enabled:** primary self-hosted workflow, requires `AUTO_SOURCE_ENABLED=true` and `HTML2RSS_ACCESS_TOKEN`
+- **Included feeds fallback only:** lower-maintenance path when the packaged feed set already covers your needs
-If you do not need page-URL generation yet, keep `AUTO_SOURCE_ENABLED` off and ship the simpler mode first.
+If you do not need page-URL generation yet, keep `AUTO_SOURCE_ENABLED` off and ship the fallback mode only.
## Prepare for Production
@@ -28,13 +28,12 @@ Before exposing html2rss-web, ensure:
- Inbound TCP ports 80 and 443 reach the host (check firewalls and cloud security groups)
- You are ready to watch the first deployment logs for certificate issuance
- You have a value ready for `HTML2RSS_SECRET_KEY`
-- You have a value ready for `HEALTH_CHECK_TOKEN` if you plan to monitor authenticated `GET /api/v1/health` (the documented Compose stack uses it; `/api/v1/health/live` and `/api/v1/health/ready` do not require it)
+- You have a value ready for `HTML2RSS_ACCESS_TOKEN`
If you plan to enable automatic feed generation, also prepare:
-- `BROWSERLESS_IO_API_TOKEN`
-- Browserless capacity appropriate for the sites you expect to render
-- an operator plan for how users obtain valid bearer tokens
+- a clear way to give users the same `HTML2RSS_ACCESS_TOKEN` your instance expects
+- any optional rendering infrastructure you want to add later for harder sites
### Why a Reverse Proxy?
@@ -52,8 +51,7 @@ Create a `.env` file beside your compose file:
code={`
CADDY_HOST=yourdomain.com
HTML2RSS_SECRET_KEY=
- HEALTH_CHECK_TOKEN=
- BROWSERLESS_IO_API_TOKEN=
+ HTML2RSS_ACCESS_TOKEN=
`}
lang="dotenv"
/>
@@ -62,7 +60,7 @@ Before starting the stack:
- Set `CADDY_HOST` for your domain.
- Generate `HTML2RSS_SECRET_KEY` with `openssl rand -hex 32`.
-- Set a strong `HEALTH_CHECK_TOKEN` when you use authenticated `GET /api/v1/health`; liveness/readiness probes can use `/api/v1/health/live` and `/api/v1/health/ready` without it.
+- Set a strong `HTML2RSS_ACCESS_TOKEN`. This is the token users paste into the web UI.
- Leave `BUILD_TAG` and `GIT_SHA` unset unless you intentionally override image metadata in logs.
- Adjust optional knobs such as `AUTO_SOURCE_ENABLED` and `SENTRY_DSN` as needed; refer to the [environment reference](/web-application/reference/env-variables) for details.
@@ -74,9 +72,9 @@ Re-test after DNS changes with [SSL Labs](https://www.ssllabs.com/ssltest/).
Harden the application before inviting other users:
-- Set a strong `HEALTH_CHECK_TOKEN` for authenticated `GET /api/v1/health`, and separate strong bearer tokens for any protected feeds
+- Set a strong `HTML2RSS_ACCESS_TOKEN` and rotate it when needed
- Prefer environment files (`.env`) stored outside version control for secrets
-- Keep any operator-only token distribution flow outside public docs and outside version control
+- Keep token sharing/distribution outside public docs and outside version control
@@ -86,7 +84,6 @@ Store these variables in a `.env` file and reference it with `env_file:` as demo
Keep the instance healthy once it is in production:
-- Monitor `https://yourdomain.com/api/v1/health` with the configured bearer token for authenticated health checks
- Review `docker compose logs` regularly for feed errors or certificate renewals
- Enable automatic image updates for the Docker tag you selected
- Right-size CPU and memory to avoid starvation when parsing large feeds
@@ -95,7 +92,7 @@ Keep the instance healthy once it is in production:
-This Watchtower shape scopes updates to `html2rss-web`, `browserless`, and `caddy`; change the service names if your stack differs.
+This Watchtower shape scopes updates to `html2rss-web`, `botasaurus`, and `caddy`; change the service names if your stack differs.
Check `docker compose logs watchtower` occasionally to confirm updates are applied.
diff --git a/src/content/docs/web-application/how-to/use-automatic-feed-generation.mdx b/src/content/docs/web-application/how-to/use-automatic-feed-generation.mdx
index c6d7fe5d..a91c824e 100644
--- a/src/content/docs/web-application/how-to/use-automatic-feed-generation.mdx
+++ b/src/content/docs/web-application/how-to/use-automatic-feed-generation.mdx
@@ -5,45 +5,70 @@ description: "Enable the token-gated web UI flow that creates a stable feed from
import { Code } from "@astrojs/starlight/components";
-Automatic feed generation lets `html2rss-web` create a stable feed from a page URL. It is useful when the included config set does not already cover the site you want.
-
-Use this only after you have already verified your instance with an included feed. In production, this feature is disabled by default and should be enabled consciously on your own instance.
+Automatic feed generation lets `html2rss-web` create a stable feed from a page URL. This is the primary onboarding flow for your own instance. Included feeds stay available as a fallback when the packaged catalog already covers the site you want.
## What This Flow Actually Requires
This flow depends on three separate things:
- `AUTO_SOURCE_ENABLED=true` on the server
-- a bearer token that the instance accepts for feed creation
-- Browserless configured if the target page needs JavaScript rendering
+- `HTML2RSS_ACCESS_TOKEN` set on the server
+- the same token pasted into the web UI when prompted
The generated API contract for this flow is published at `/openapi.yaml`.
-## How to Enable It
+## Minimal Local Setup
-Edit your `docker-compose.yml` and enable automatic feed generation:
+Use a minimal `docker-compose.yml`:
-Keep the existing `BROWSERLESS_IO_WEBSOCKET_URL` and `BROWSERLESS_IO_API_TOKEN` settings if you want JavaScript-heavy pages to work reliably.
+Then create `.env` beside it:
+
+
+ HTML2RSS_ACCESS_TOKEN=
+ `}
+ lang="dotenv"
+/>
-Then restart the stack:
+Then start the stack:
## How to Use It
1. Open your instance at `http://localhost:4000`
-2. Paste a page URL into `Create a feed`
-3. Add a valid access token when prompted
-4. Submit the request
-5. Copy the generated feed URL or open it directly
+2. Paste a page URL into `Page URL`
+3. Paste the same `HTML2RSS_ACCESS_TOKEN` from `.env` when prompted
+4. Click `Generate feed URL`
+5. Open the generated feed URL
## What Success Looks Like
@@ -57,11 +82,6 @@ When the flow works, you should see:
That is enough to confirm the self-hosted flow is working.
-## Strategy Behavior
-
-- Feed creation uses the backend default strategy behavior.
-- If feed creation fails, the UI surfaces structured retry/error guidance rather than exposing low-level strategy controls.
-
## Input URL Guidance (Quality First)
Automatic generation is most successful when the input URL is already a listing/update surface.
@@ -79,34 +99,19 @@ If output quality is poor, switch the input to a direct listing/update URL befor
## Failure Meanings You May See
-The backend runtime classifies common extraction failures with clearer intent:
-
-- blocked/interstitial surface likely
-- app-shell surface likely
-- unsupported extraction surface for auto mode
-
-In the current web product flow, these categories are mostly internal/operator-level signals (runtime/logging). They are not guaranteed to appear as labeled categories in the UI.
-
-What users typically see today:
-
-- feed-creation API errors (for example auth/URL/unsupported strategy)
-- preview-level fallback text such as `Preview unavailable right now.`
-- feed render error payloads when opening feed URLs directly
-
-## Browserless Troubleshooting In `html2rss-web`
-
-If Browserless-backed attempts fail:
+What users typically see:
-- verify the Browserless container/service is running
-- verify `BROWSERLESS_IO_WEBSOCKET_URL` is reachable from the web container
-- verify `BROWSERLESS_IO_API_TOKEN` matches the Browserless `TOKEN`
+- access token rejected
+- invalid or unsupported input URL
+- preview unavailable right now
+- a feed render error when opening the generated feed URL directly
-For local Compose-based setups, check container health/logs with:
+For local setup problems, check container state and logs:
diff --git a/src/content/docs/web-application/how-to/use-included-configs.mdx b/src/content/docs/web-application/how-to/use-included-configs.mdx
index d6143b05..7141448b 100644
--- a/src/content/docs/web-application/how-to/use-included-configs.mdx
+++ b/src/content/docs/web-application/how-to/use-included-configs.mdx
@@ -1,9 +1,9 @@
---
title: "Use the included configs"
-description: "Use the embedded html2rss-configs feed set from your own html2rss-web instance."
+description: "Use the embedded html2rss-configs feed set from your own html2rss-web instance when you want the packaged fallback path."
---
-`html2rss-web` can serve the embedded `html2rss-configs` feed set directly from your own instance. This is the fastest path when you want a working feed without writing YAML first.
+`html2rss-web` can serve the embedded `html2rss-configs` feed set directly from your own instance. Use this when the packaged catalog already covers your site or when you want a fallback path without generating a new feed first.
## How to Use Them
@@ -39,5 +39,5 @@ Use a custom config when:
Next steps:
-- [Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/) when you want to create feeds directly from page URLs
+- [Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/) for the main page-URL workflow on your own instance
- [Create Custom Feeds](/creating-custom-feeds) when you need a reviewable config
diff --git a/src/content/docs/web-application/index.mdx b/src/content/docs/web-application/index.mdx
index a69ecebd..7ec08970 100644
--- a/src/content/docs/web-application/index.mdx
+++ b/src/content/docs/web-application/index.mdx
@@ -1,26 +1,27 @@
---
title: "Web Application"
-description: "html2rss-web is the self-hosted web interface and feed server for included feeds, direct generation, and custom configs."
+description: "html2rss-web is the self-hosted web interface and feed server for page-URL generation, included-feed fallback, and custom configs."
sidebar:
label: "Overview"
order: 1
---
-`html2rss-web` is the recommended way to get started. Run it locally with Docker, verify one feed from your own instance, then decide whether you need token-gated direct generation or custom configs.
+`html2rss-web` is the recommended way to get started. Run it locally with Docker, set your token in `.env`, create a first generated feed, then decide whether you also want included-feed fallback or custom configs.
## Get Started
Start with **[Getting Started](/web-application/getting-started)** to:
- run your own local instance
-- verify the web interface
+- paste your own token
+- open a generated feed from your own instance
- choose the right next step for your site
## What The Web App Gives You
-- **Included feed catalog:** real embedded configs you can use immediately from your own deployment
-- **Web interface:** direct feed creation when you explicitly enable it
-- **Access-controlled generation:** `POST /api/v1/feeds` requires a bearer token
+- **Web interface:** direct feed creation from page URLs
+- **Access-controlled generation:** `POST /api/v1/feeds` expects the token configured on your instance
+- **Included feed catalog:** packaged fallback feeds you can use immediately from your own deployment
- **Config-based extension path:** move to custom feeds when you need reviewable rules
- **Caching and HTTP handling:** shipped as part of the deployment
- **Generated API contract:** OpenAPI is published at `/openapi.yaml`
@@ -30,8 +31,8 @@ The scraping and feed-building engine is provided by the Ruby gem [`html2rss`](h
## Recommended Flow
1. **[Getting Started](/web-application/getting-started)**: run the app locally
-2. **[Use the included configs](/web-application/how-to/use-included-configs/)**: use the embedded feed set when it covers your site
-3. **[Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/)**: enable direct page-URL conversion when you want that workflow
+2. **[Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/)**: keep the page-URL flow working on your own instance
+3. **[Use the included configs](/web-application/how-to/use-included-configs/)**: use the embedded feed set as fallback when it covers your site
4. **[Create Custom Feeds](/creating-custom-feeds)**: build a stable custom setup when needed
## For Integrations
diff --git a/src/content/docs/web-application/reference/env-variables.mdx b/src/content/docs/web-application/reference/env-variables.mdx
index dd1944b1..71a39e59 100644
--- a/src/content/docs/web-application/reference/env-variables.mdx
+++ b/src/content/docs/web-application/reference/env-variables.mdx
@@ -8,12 +8,13 @@ description: "Configuration reference for html2rss-web environment variables."
| Name | Description |
| --------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `HTML2RSS_SECRET_KEY` | required in production; development/test gets a temporary default |
+| `HTML2RSS_ACCESS_TOKEN` | token for the main feed-creation flow; paste the same value into the web UI when prompted |
| `HEALTH_CHECK_TOKEN` | bearer token for authenticated `GET /api/v1/health`; optional unless you use that endpoint (the documented Compose stack includes it); `/api/v1/health/live` and `/api/v1/health/ready` do not require it |
| `BUILD_TAG` | release metadata used in logs; published Docker images set this to the release version |
| `GIT_SHA` | deployed commit metadata used in logs; published Docker images set this to the released commit |
| `SENTRY_DSN` | optional; enables Sentry errors/logs when set |
-| `BROWSERLESS_IO_WEBSOCKET_URL` | optional; Browserless websocket endpoint for `browserless` strategy |
-| `BROWSERLESS_IO_API_TOKEN` | required by this site's Compose stack and by custom websocket endpoints; standalone `html2rss` local defaults can omit it |
+| `BROWSERLESS_IO_WEBSOCKET_URL` | optional; Browserless websocket endpoint for harder sites when you add Browserless later |
+| `BROWSERLESS_IO_API_TOKEN` | optional unless you explicitly add Browserless to your stack or point at a custom Browserless endpoint |
| `AUTO_SOURCE_ENABLED` | `true` by default in development/test, `false` otherwise |
| `ASYNC_FEED_REFRESH_ENABLED` | optional boolean; default `false` |
| `ASYNC_FEED_REFRESH_STALE_FACTOR` | optional integer `>= 1`; default `3` |