Skip to content

Commit a057383

Browse files
committed
feat: add /health endpoint with Docker HEALTHCHECK
Add an HTTP health server that reports upstream Modbus connectivity status, enabling Docker to monitor container health without external tools in the scratch-based image. The health server listens on :8080 (configurable via HEALTH_LISTEN) and exposes GET /health, which returns 200 when upstream is reachable or 503 with error details when it is not. Upstream status is tracked passively from real request results to avoid additional load on the Modbus device. The binary's -health flag acts as an HTTP client for the HEALTHCHECK instruction, since scratch has no curl/wget. Bind failures on the health port are treated as fatal to prevent Docker from killing an otherwise-healthy container due to unreachable health checks.
1 parent 52606bd commit a057383

File tree

8 files changed

+387
-4
lines changed

8 files changed

+387
-4
lines changed

Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,9 @@ FROM scratch
3131

3232
COPY --from=builder /app/mbproxy /mbproxy
3333

34+
EXPOSE 8080
35+
36+
HEALTHCHECK --interval=5s --timeout=3s --start-period=10s --retries=3 \
37+
CMD ["/mbproxy", "-health"]
38+
3439
ENTRYPOINT ["/mbproxy"]

cmd/mbproxy/main.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,33 @@ package main
22

33
import (
44
"context"
5+
"flag"
6+
"fmt"
57
"log/slog"
68
"os"
79
"os/signal"
810
"syscall"
11+
"time"
912

1013
"github.com/tma/mbproxy/internal/config"
14+
"github.com/tma/mbproxy/internal/health"
1115
"github.com/tma/mbproxy/internal/logging"
1216
"github.com/tma/mbproxy/internal/proxy"
1317
)
1418

1519
func main() {
20+
healthCheck := flag.Bool("health", false, "run health check and exit")
21+
flag.Parse()
22+
23+
if *healthCheck {
24+
addr := config.GetEnv("HEALTH_LISTEN", ":8080")
25+
if err := health.CheckHealth(addr); err != nil {
26+
fmt.Fprintln(os.Stderr, err)
27+
os.Exit(1)
28+
}
29+
return
30+
}
31+
1632
cfg, err := config.Load()
1733
if err != nil {
1834
slog.Error("failed to load configuration", "error", err)
@@ -34,6 +50,19 @@ func main() {
3450
os.Exit(1)
3551
}
3652

53+
// Start health server
54+
hs := health.NewServer(cfg.HealthListen, p, logger)
55+
hsLn, err := hs.Listen()
56+
if err != nil {
57+
logger.Error("failed to start health server", "error", err)
58+
os.Exit(1)
59+
}
60+
go func() {
61+
if err := hs.Serve(hsLn); err != nil {
62+
logger.Error("health server error", "error", err)
63+
}
64+
}()
65+
3766
// Start proxy in background
3867
errCh := make(chan error, 1)
3968
go func() {
@@ -53,6 +82,13 @@ func main() {
5382

5483
// Graceful shutdown
5584
cancel()
85+
86+
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second)
87+
defer shutdownCancel()
88+
if err := hs.Shutdown(shutdownCtx); err != nil {
89+
logger.Error("health server shutdown error", "error", err)
90+
}
91+
5692
if err := p.Shutdown(cfg.ShutdownTimeout); err != nil {
5793
logger.Error("shutdown error", "error", err)
5894
os.Exit(1)

internal/config/config.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,14 @@ type Config struct {
3030
RequestDelay time.Duration
3131
ConnectDelay time.Duration
3232
ShutdownTimeout time.Duration
33+
HealthListen string
3334
LogLevel string
3435
}
3536

3637
// Load reads configuration from environment variables.
3738
func Load() (*Config, error) {
3839
cfg := &Config{
39-
Listen: getEnv("MODBUS_LISTEN", ":5502"),
40+
Listen: GetEnv("MODBUS_LISTEN", ":5502"),
4041
Upstream: os.Getenv("MODBUS_UPSTREAM"),
4142
DefaultSlaveID: 1,
4243
CacheTTL: 10 * time.Second,
@@ -46,7 +47,8 @@ func Load() (*Config, error) {
4647
RequestDelay: 0,
4748
ConnectDelay: 0,
4849
ShutdownTimeout: 30 * time.Second,
49-
LogLevel: getEnv("LOG_LEVEL", "INFO"),
50+
HealthListen: GetEnv("HEALTH_LISTEN", ":8080"),
51+
LogLevel: GetEnv("LOG_LEVEL", "INFO"),
5052
}
5153

5254
if cfg.Upstream == "" {
@@ -129,7 +131,9 @@ func Load() (*Config, error) {
129131
return cfg, nil
130132
}
131133

132-
func getEnv(key, defaultValue string) string {
134+
// GetEnv returns the value of the environment variable named by key,
135+
// or defaultValue if the variable is not set.
136+
func GetEnv(key, defaultValue string) string {
133137
if v := os.Getenv(key); v != "" {
134138
return v
135139
}

internal/config/config_test.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ func TestLoad_Defaults(t *testing.T) {
1919
os.Unsetenv("MODBUS_READONLY")
2020
os.Unsetenv("MODBUS_TIMEOUT")
2121
os.Unsetenv("MODBUS_SHUTDOWN_TIMEOUT")
22+
os.Unsetenv("HEALTH_LISTEN")
2223
os.Unsetenv("LOG_LEVEL")
2324

2425
cfg, err := Load()
@@ -56,6 +57,9 @@ func TestLoad_Defaults(t *testing.T) {
5657
if cfg.ShutdownTimeout != 30*time.Second {
5758
t.Errorf("expected 30s shutdown timeout, got %v", cfg.ShutdownTimeout)
5859
}
60+
if cfg.HealthListen != ":8080" {
61+
t.Errorf("expected :8080, got %s", cfg.HealthListen)
62+
}
5963
if cfg.LogLevel != "INFO" {
6064
t.Errorf("expected INFO log level, got %s", cfg.LogLevel)
6165
}
@@ -81,6 +85,7 @@ func TestLoad_CustomValues(t *testing.T) {
8185
os.Setenv("MODBUS_REQUEST_DELAY", "100ms")
8286
os.Setenv("MODBUS_CONNECT_DELAY", "200ms")
8387
os.Setenv("MODBUS_SHUTDOWN_TIMEOUT", "60s")
88+
os.Setenv("HEALTH_LISTEN", ":9090")
8489
os.Setenv("LOG_LEVEL", "DEBUG")
8590

8691
defer func() {
@@ -94,6 +99,7 @@ func TestLoad_CustomValues(t *testing.T) {
9499
os.Unsetenv("MODBUS_REQUEST_DELAY")
95100
os.Unsetenv("MODBUS_CONNECT_DELAY")
96101
os.Unsetenv("MODBUS_SHUTDOWN_TIMEOUT")
102+
os.Unsetenv("HEALTH_LISTEN")
97103
os.Unsetenv("LOG_LEVEL")
98104
}()
99105

@@ -129,6 +135,9 @@ func TestLoad_CustomValues(t *testing.T) {
129135
if cfg.ShutdownTimeout != 60*time.Second {
130136
t.Errorf("expected 60s shutdown timeout, got %v", cfg.ShutdownTimeout)
131137
}
138+
if cfg.HealthListen != ":9090" {
139+
t.Errorf("expected :9090, got %s", cfg.HealthListen)
140+
}
132141
if cfg.LogLevel != "DEBUG" {
133142
t.Errorf("expected DEBUG log level, got %s", cfg.LogLevel)
134143
}

internal/health/health.go

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
// Package health provides an HTTP health check server.
2+
package health
3+
4+
import (
5+
"context"
6+
"encoding/json"
7+
"fmt"
8+
"log/slog"
9+
"net"
10+
"net/http"
11+
"time"
12+
)
13+
14+
// Checker reports whether a component is healthy.
15+
type Checker interface {
16+
Healthy() error
17+
}
18+
19+
// Response is the JSON body returned by the health endpoint.
20+
type Response struct {
21+
Status string `json:"status"`
22+
Error string `json:"error,omitempty"`
23+
}
24+
25+
// Server is a lightweight HTTP server that exposes a /health endpoint.
26+
type Server struct {
27+
httpServer *http.Server
28+
logger *slog.Logger
29+
}
30+
31+
// NewServer creates a new health check server.
32+
// The checker is called on each request to determine upstream health.
33+
func NewServer(addr string, checker Checker, logger *slog.Logger) *Server {
34+
mux := http.NewServeMux()
35+
s := &Server{
36+
httpServer: &http.Server{
37+
Addr: addr,
38+
Handler: mux,
39+
ReadHeaderTimeout: 5 * time.Second,
40+
},
41+
logger: logger,
42+
}
43+
44+
mux.HandleFunc("/health", s.handleHealth(checker))
45+
46+
return s
47+
}
48+
49+
func (s *Server) handleHealth(checker Checker) http.HandlerFunc {
50+
return func(w http.ResponseWriter, r *http.Request) {
51+
w.Header().Set("Content-Type", "application/json")
52+
53+
if err := checker.Healthy(); err != nil {
54+
w.WriteHeader(http.StatusServiceUnavailable)
55+
resp := Response{Status: "unhealthy", Error: err.Error()}
56+
if encErr := json.NewEncoder(w).Encode(resp); encErr != nil {
57+
s.logger.Error("failed to encode health response", "error", encErr)
58+
}
59+
return
60+
}
61+
62+
w.WriteHeader(http.StatusOK)
63+
resp := Response{Status: "ok"}
64+
if err := json.NewEncoder(w).Encode(resp); err != nil {
65+
s.logger.Error("failed to encode health response", "error", err)
66+
}
67+
}
68+
}
69+
70+
// ListenAndServe starts the health server. It blocks until the server
71+
// is shut down or encounters a fatal error. Use Listen + Serve to
72+
// separate binding from serving, which allows detecting bind errors early.
73+
func (s *Server) ListenAndServe() error {
74+
s.logger.Info("health server listening", "addr", s.httpServer.Addr)
75+
err := s.httpServer.ListenAndServe()
76+
if err == http.ErrServerClosed {
77+
return nil
78+
}
79+
return err
80+
}
81+
82+
// Listen binds the server to its configured address. Call Serve to
83+
// start accepting connections after Listen returns successfully.
84+
func (s *Server) Listen() (net.Listener, error) {
85+
ln, err := net.Listen("tcp", s.httpServer.Addr)
86+
if err != nil {
87+
return nil, err
88+
}
89+
s.logger.Info("health server listening", "addr", ln.Addr())
90+
return ln, nil
91+
}
92+
93+
// Serve accepts connections on the given listener. It blocks until the
94+
// server is shut down or encounters a fatal error.
95+
func (s *Server) Serve(ln net.Listener) error {
96+
err := s.httpServer.Serve(ln)
97+
if err == http.ErrServerClosed {
98+
return nil
99+
}
100+
return err
101+
}
102+
103+
// Shutdown gracefully shuts down the health server.
104+
func (s *Server) Shutdown(ctx context.Context) error {
105+
return s.httpServer.Shutdown(ctx)
106+
}
107+
108+
// CheckHealth performs an HTTP health check against the given address.
109+
// It returns nil if the endpoint responds with 200 OK.
110+
func CheckHealth(addr string) error {
111+
// Resolve the address so we can build a proper URL.
112+
host, port, err := net.SplitHostPort(addr)
113+
if err != nil {
114+
return fmt.Errorf("invalid address %q: %w", addr, err)
115+
}
116+
if host == "" {
117+
host = "localhost"
118+
}
119+
120+
url := fmt.Sprintf("http://%s:%s/health", host, port)
121+
122+
client := &http.Client{Timeout: 3 * time.Second}
123+
resp, err := client.Get(url)
124+
if err != nil {
125+
return fmt.Errorf("health check request failed: %w", err)
126+
}
127+
defer resp.Body.Close()
128+
129+
if resp.StatusCode != http.StatusOK {
130+
return fmt.Errorf("health check returned status %d", resp.StatusCode)
131+
}
132+
return nil
133+
}

0 commit comments

Comments
 (0)