Skip to content

Commit 1c61daa

Browse files
wesmclaude
andauthored
Add TUI control socket, --no-quit flag, and runtime metadata (#488)
## Summary - Add a newline-delimited JSON protocol over a Unix domain socket (`tui.{PID}.sock`) that allows external tools to programmatically query and mutate running TUI instances - Support 13 commands: 4 queries (`get-state`, `get-filter`, `get-jobs`, `get-selected`) and 9 mutations (`set-filter`, `clear-filter`, `set-hide-closed`, `select-job`, `set-view`, `close-review`, `cancel-job`, `rerun-job`, `quit`) - Write runtime metadata to `tui.{PID}.json` (PID, socket path, server address) for discoverability; refresh on daemon reconnect when server address changes - Add `--control-socket` flag for custom socket paths and `--no-quit` flag that suppresses keyboard quit (`q`) in queue/tasks views for managed TUI instances - `quit` control command terminates the TUI regardless of `--no-quit`, allowing external controllers to manage lifecycle programmatically - `set-filter` resolves display names (e.g. "msgvault") to root paths via `/api/repos`, matching the interactive filter modal's resolution; `reposMsg` carries an explicit `branchFiltered` flag so branch-scoped loads don't clobber the display-name map - Help text conditionally omits `q`/quit references when `--no-quit` is active - Socket security: stale sockets from dead processes cleaned via platform-specific PID checks (signal-0 on Unix, tasklist on Windows); socket created with `0600` permissions via `os.Chmod` (not process-wide umask); managed socket directory tightened to `0700`; custom `--control-socket` paths skip directory chmod to avoid clobbering shared directories like `/tmp` - Startup safety: control listener deferred until Bubble Tea event loop is running (`ready` channel); `controlSocketReadyMsg` prevents advertising a socket path before the listener binds; `cleanupDone` channel ensures socket removal completes before `Run()` returns; `programDone` channel prevents deadlock if program exits before first `Update` - Cleanup ordering: socket file removed before listener close to prevent a successor process's socket from being unlinked by a dying predecessor - Filter mutations (`set-filter`, `clear-filter`) validate all lock constraints before mutating any state to prevent partial application - `rerun-job` clears `Closed`/`Verdict` fields so rerun jobs remain visible under `hideClosed`; both the control socket and keyboard rerun paths use a shared `rerunSnapshot` for rollback on failure - `select-job` rejects jobs hidden by active filters; `close-review`/`cancel-job` only reflow selection when the mutated job is the currently selected row 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9f29f60 commit 1c61daa

24 files changed

+3062
-47
lines changed

AGENTS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ CLI (roborev) -> HTTP API -> Daemon -> Worker Pool -> Agent adapters
138138

139139
## Testing
140140

141+
Use `testify` (`github.com/stretchr/testify`) for all test assertions. Use `require.*` for fatal preconditions and `assert.*` for non-fatal checks. Do not use raw `if`/`t.Errorf`/`t.Fatalf` patterns.
142+
141143
- After any Go code changes, run `go fmt ./...` and `go vet ./...` before committing.
142144
- Fast test pass: `go test ./...`
143145
- Integration tests: `go test -tags=integration ./...`

CLAUDE.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,9 @@ roborev tui # Terminal UI
341341
- `test` agent is always available (no PATH lookup)
342342
- Worker pool test hooks enable deterministic synchronization
343343
- Table-driven tests are preferred
344+
- Use `testify` (`assert`/`require`) for all test assertions -- do not use raw `if`/`t.Errorf`/`t.Fatalf` patterns
345+
- `require.*` for fatal preconditions (test cannot continue if this fails), `assert.*` for non-fatal checks
346+
- In tests with more than three assertions, prefer `assert := assert.New(t)` shorthand
344347

345348
## Conventions
346349

cmd/roborev/tui/action_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ func TestTUICancelJobSuccess(t *testing.T) {
512512
}
513513
_, m := mockServerModel(t, expectJSONPost(t, "/api/job/cancel", cancelRequest{JobID: 42}, map[string]any{"success": true}))
514514
oldFinishedAt := time.Now().Add(-1 * time.Hour)
515-
cmd := m.cancelJob(42, storage.JobStatusRunning, &oldFinishedAt)
515+
cmd := m.cancelJob(42, storage.JobStatusRunning, &oldFinishedAt, false)
516516
msg := cmd()
517517

518518
result := assertMsgType[cancelResultMsg](t, msg)
@@ -530,7 +530,7 @@ func TestTUICancelJobNotFound(t *testing.T) {
530530
w.WriteHeader(http.StatusNotFound)
531531
json.NewEncoder(w).Encode(map[string]string{"error": "not found"})
532532
})
533-
cmd := m.cancelJob(99, storage.JobStatusQueued, nil)
533+
cmd := m.cancelJob(99, storage.JobStatusQueued, nil, false)
534534
msg := cmd()
535535

536536
result := assertMsgType[cancelResultMsg](t, msg)

cmd/roborev/tui/actions.go

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -144,22 +144,60 @@ func (m model) markParentClosed(parentJobID int64) tea.Cmd {
144144
}
145145
}
146146

147-
// cancelJob sends a cancel request to the server
148-
func (m model) cancelJob(jobID int64, oldStatus storage.JobStatus, oldFinishedAt *time.Time) tea.Cmd {
147+
// cancelJob sends a cancel request to the server.
148+
// restoreSelection tells the result handler to reselect the job
149+
// if the request fails and the optimistic update is rolled back.
150+
func (m model) cancelJob(
151+
jobID int64, oldStatus storage.JobStatus,
152+
oldFinishedAt *time.Time, restoreSelection bool,
153+
) tea.Cmd {
149154
return func() tea.Msg {
150-
err := m.postJSON("/api/job/cancel", map[string]any{"job_id": jobID}, nil)
151-
return cancelResultMsg{jobID: jobID, oldState: oldStatus, oldFinishedAt: oldFinishedAt, err: err}
155+
err := m.postJSON(
156+
"/api/job/cancel",
157+
map[string]any{"job_id": jobID}, nil,
158+
)
159+
return cancelResultMsg{
160+
jobID: jobID,
161+
oldState: oldStatus,
162+
oldFinishedAt: oldFinishedAt,
163+
restoreSelection: restoreSelection,
164+
err: err,
165+
}
152166
}
153167
}
154168

155-
// rerunJob sends a rerun request to the server for failed/canceled jobs
156-
func (m model) rerunJob(jobID int64, oldStatus storage.JobStatus, oldStartedAt, oldFinishedAt *time.Time, oldError string) tea.Cmd {
169+
// rerunJob sends a rerun request to the server for failed/canceled jobs.
170+
func (m model) rerunJob(snap rerunSnapshot) tea.Cmd {
157171
return func() tea.Msg {
158-
err := m.postJSON("/api/job/rerun", map[string]any{"job_id": jobID}, nil)
159-
return rerunResultMsg{jobID: jobID, oldState: oldStatus, oldStartedAt: oldStartedAt, oldFinishedAt: oldFinishedAt, oldError: oldError, err: err}
172+
err := m.postJSON(
173+
"/api/job/rerun",
174+
map[string]any{"job_id": snap.jobID}, nil,
175+
)
176+
return rerunResultMsg{
177+
jobID: snap.jobID,
178+
oldState: snap.oldStatus,
179+
oldStartedAt: snap.oldStartedAt,
180+
oldFinishedAt: snap.oldFinishedAt,
181+
oldError: snap.oldError,
182+
oldClosed: snap.oldClosed,
183+
oldVerdict: snap.oldVerdict,
184+
err: err,
185+
}
160186
}
161187
}
162188

189+
// rerunSnapshot captures job state before an optimistic rerun
190+
// update so it can be rolled back if the server request fails.
191+
type rerunSnapshot struct {
192+
jobID int64
193+
oldStatus storage.JobStatus
194+
oldStartedAt *time.Time
195+
oldFinishedAt *time.Time
196+
oldError string
197+
oldClosed *bool
198+
oldVerdict *string
199+
}
200+
163201
func (m model) submitComment(jobID int64, text string) tea.Cmd {
164202
return func() tea.Msg {
165203
commenter := os.Getenv("USER")

cmd/roborev/tui/control.go

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
package tui
2+
3+
import (
4+
"bufio"
5+
"context"
6+
"encoding/json"
7+
"errors"
8+
"fmt"
9+
"log"
10+
"net"
11+
"os"
12+
"path/filepath"
13+
"syscall"
14+
"time"
15+
16+
tea "github.com/charmbracelet/bubbletea"
17+
)
18+
19+
const controlResponseTimeout = 3 * time.Second
20+
21+
// removeStaleSocket checks whether socketPath is a leftover Unix
22+
// socket from a previous crash and removes it. Returns an error if
23+
// the path exists but is not a socket (protecting regular files from
24+
// accidental deletion via a mistyped --control-socket).
25+
func removeStaleSocket(socketPath string) error {
26+
fi, err := os.Lstat(socketPath)
27+
if os.IsNotExist(err) {
28+
return nil
29+
}
30+
if err != nil {
31+
return fmt.Errorf("stat socket path: %w", err)
32+
}
33+
if fi.Mode().Type()&os.ModeSocket == 0 {
34+
return fmt.Errorf(
35+
"%s already exists and is not a socket", socketPath,
36+
)
37+
}
38+
// It's a socket — try to connect to see if it's still live.
39+
conn, dialErr := net.DialTimeout("unix", socketPath, 500*time.Millisecond)
40+
if dialErr == nil {
41+
conn.Close()
42+
return fmt.Errorf(
43+
"%s is already in use by another listener", socketPath,
44+
)
45+
}
46+
// Only treat ECONNREFUSED as proof that nothing is listening.
47+
// Other dial errors (wrong socket type, permission denied, etc.)
48+
// are ambiguous and could indicate a live non-stream socket.
49+
if !isConnRefused(dialErr) {
50+
return fmt.Errorf(
51+
"%s: cannot determine socket state: %w",
52+
socketPath, dialErr,
53+
)
54+
}
55+
// ECONNREFUSED — nothing is listening. Safe to remove.
56+
if err := os.Remove(socketPath); err != nil {
57+
return fmt.Errorf("remove stale socket: %w", err)
58+
}
59+
return nil
60+
}
61+
62+
// isConnRefused returns true when the error chain contains
63+
// ECONNREFUSED, which means a socket exists but nothing is listening.
64+
func isConnRefused(err error) bool {
65+
return errors.Is(err, syscall.ECONNREFUSED)
66+
}
67+
68+
// startControlListener creates a Unix domain socket and starts
69+
// accepting connections. Each connection receives one JSON command,
70+
// dispatches it through the tea.Program, and returns a JSON response.
71+
// Returns a cleanup function that closes the listener and removes
72+
// the socket file.
73+
// ensureSocketDir creates the socket parent directory with
74+
// owner-only permissions. MkdirAll is a no-op on existing
75+
// directories, so we always chmod explicitly to tighten
76+
// pre-existing dirs (e.g. ~/.roborev created with 0755).
77+
// This must only be called for managed directories (the
78+
// default data dir), never for arbitrary user-supplied paths.
79+
func ensureSocketDir(dir string) error {
80+
if err := os.MkdirAll(dir, 0700); err != nil {
81+
return fmt.Errorf("create socket directory: %w", err)
82+
}
83+
if err := os.Chmod(dir, 0700); err != nil {
84+
return fmt.Errorf("chmod socket directory: %w", err)
85+
}
86+
return nil
87+
}
88+
89+
func startControlListener(
90+
socketPath string, p *tea.Program,
91+
) (func(), error) {
92+
// Ensure the parent directory exists for custom socket paths.
93+
// Permission tightening is handled separately by ensureSocketDir
94+
// for the default managed directory only.
95+
if err := os.MkdirAll(filepath.Dir(socketPath), 0755); err != nil {
96+
return nil, fmt.Errorf("create socket directory: %w", err)
97+
}
98+
99+
// Only remove an existing path if it is a stale Unix socket.
100+
// Refusing to remove regular files prevents data loss from
101+
// a mistyped --control-socket path.
102+
if err := removeStaleSocket(socketPath); err != nil {
103+
return nil, err
104+
}
105+
106+
ln, err := net.Listen("unix", socketPath)
107+
if err != nil {
108+
return nil, fmt.Errorf("listen on %s: %w", socketPath, err)
109+
}
110+
111+
// Restrict socket permissions to owner-only.
112+
if err := os.Chmod(socketPath, 0600); err != nil {
113+
ln.Close()
114+
return nil, fmt.Errorf("chmod socket: %w", err)
115+
}
116+
117+
ctx, cancel := context.WithCancel(context.Background())
118+
119+
go acceptLoop(ctx, ln, p)
120+
121+
// Disable automatic unlinking so ln.Close() does not remove
122+
// whatever is at socketPath — a successor process may have
123+
// already bound a new socket there.
124+
ln.(*net.UnixListener).SetUnlinkOnClose(false)
125+
126+
cleanup := func() {
127+
cancel()
128+
os.Remove(socketPath)
129+
ln.Close()
130+
}
131+
return cleanup, nil
132+
}
133+
134+
func acceptLoop(ctx context.Context, ln net.Listener, p *tea.Program) {
135+
for {
136+
conn, err := ln.Accept()
137+
if err != nil {
138+
// Check if listener was closed (normal shutdown)
139+
select {
140+
case <-ctx.Done():
141+
return
142+
default:
143+
}
144+
log.Printf("control: accept error: %v", err)
145+
// Back off on transient errors (e.g. EMFILE) to
146+
// avoid a tight CPU-pegging loop.
147+
time.Sleep(100 * time.Millisecond)
148+
continue
149+
}
150+
go handleControlConn(ctx, conn, p)
151+
}
152+
}
153+
154+
func handleControlConn(
155+
ctx context.Context, conn net.Conn, p *tea.Program,
156+
) {
157+
defer conn.Close()
158+
159+
// Set read deadline to prevent hung connections
160+
if err := conn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
161+
return
162+
}
163+
164+
scanner := bufio.NewScanner(conn)
165+
scanner.Buffer(make([]byte, 64*1024), 64*1024)
166+
if !scanner.Scan() {
167+
writeError(conn, "empty request")
168+
return
169+
}
170+
171+
var req controlRequest
172+
if err := json.Unmarshal(scanner.Bytes(), &req); err != nil {
173+
writeError(conn, "invalid JSON: "+err.Error())
174+
return
175+
}
176+
177+
resp := dispatchCommand(ctx, req, p)
178+
writeResponse(conn, resp)
179+
}
180+
181+
func dispatchCommand(
182+
ctx context.Context, req controlRequest, p *tea.Program,
183+
) controlResponse {
184+
isQuery, isMutation := isControlCommand(req.Command)
185+
186+
switch {
187+
case isQuery:
188+
return queryViaProgram(ctx, p, req)
189+
case isMutation:
190+
return mutateViaProgram(ctx, p, req)
191+
default:
192+
return controlResponse{
193+
Error: fmt.Sprintf("unknown command: %s", req.Command),
194+
}
195+
}
196+
}
197+
198+
// queryViaProgram sends a controlQueryMsg through the program and
199+
// waits for the Update handler to write the response. The control
200+
// listener is only started after the event loop is running (via the
201+
// ready channel in Run), so p.Send will not block here.
202+
func queryViaProgram(
203+
ctx context.Context, p *tea.Program, req controlRequest,
204+
) controlResponse {
205+
respCh := make(chan controlResponse, 1)
206+
p.Send(controlQueryMsg{req: req, respCh: respCh})
207+
208+
select {
209+
case resp := <-respCh:
210+
return resp
211+
case <-ctx.Done():
212+
return controlResponse{Error: "TUI is shutting down"}
213+
case <-time.After(controlResponseTimeout):
214+
return controlResponse{Error: "response timeout"}
215+
}
216+
}
217+
218+
// mutateViaProgram sends a controlMutationMsg through the program
219+
// and waits for the Update handler to write the response.
220+
func mutateViaProgram(
221+
ctx context.Context, p *tea.Program, req controlRequest,
222+
) controlResponse {
223+
respCh := make(chan controlResponse, 1)
224+
p.Send(controlMutationMsg{req: req, respCh: respCh})
225+
226+
select {
227+
case resp := <-respCh:
228+
return resp
229+
case <-ctx.Done():
230+
return controlResponse{Error: "TUI is shutting down"}
231+
case <-time.After(controlResponseTimeout):
232+
return controlResponse{Error: "response timeout"}
233+
}
234+
}
235+
236+
func writeResponse(conn net.Conn, resp controlResponse) {
237+
data, err := json.Marshal(resp)
238+
if err != nil {
239+
writeError(conn, "marshal error: "+err.Error())
240+
return
241+
}
242+
data = append(data, '\n')
243+
_, _ = conn.Write(data)
244+
}
245+
246+
func writeError(conn net.Conn, msg string) {
247+
resp := controlResponse{Error: msg}
248+
data, _ := json.Marshal(resp)
249+
data = append(data, '\n')
250+
_, _ = conn.Write(data)
251+
}

0 commit comments

Comments
 (0)