Skip to content

Commit ea6aac9

Browse files
osortegaCopilot
andauthored
Agents tunnels: auto-reconnect with backoff and wake-triggered retry (#310868)
* Agents tunnels: auto-reconnect with backoff and wake-triggered retry Tunnel-backed remote agent hosts previously had no auto-reconnect behavior — on laptop sleep / network drop the tunnel would flip to Disconnected and stay there until the user manually retried. This adds a reconnect loop inside TunnelAgentHostContribution: - Detect Connected→Disconnected transitions for still-cached tunnels and schedule an immediate reconnect. Only fires when the entry is explicitly Disconnected — if the entry has been removed (e.g. user clicked "Remove Remote"), we honour the removal and do not reconnect. - Exponential backoff on consecutive failures: 1s → 30s cap, up to 10 attempts, then pause. - Wake-triggered retry: on browser `online` or tab `visibilitychange` → visible, resume any paused reconnects. Rate-limited to one resume per 10s so rapid tab toggling can't hammer a permanently broken endpoint with unbounded attempt bursts. - Prune all reconnect state when a tunnel is uncached or the contribution is disposed. * Review comment Co-authored-by: Copilot <copilot@github.com> * Telemetry Co-authored-by: Copilot <copilot@github.com> * Clean up Co-authored-by: Copilot <copilot@github.com> --------- Co-authored-by: Copilot <copilot@github.com>
1 parent 99c9ee1 commit ea6aac9

File tree

2 files changed

+459
-0
lines changed

2 files changed

+459
-0
lines changed

src/vs/sessions/common/sessionsTelemetry.ts

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,64 @@ type ChangesViewReviewCommentAddedClassification = {
111111
export function logChangesViewReviewCommentAdded(telemetryService: ITelemetryService, data: { hasExistingFeedback: boolean; hasSuggestion: boolean; isFromPRReview: boolean }): void {
112112
telemetryService.publicLog2<ChangesViewReviewCommentAddedEvent, ChangesViewReviewCommentAddedClassification>('vscodeAgents.changesView/reviewCommentAdded', data);
113113
}
114+
115+
// --- Tunnel agent host connect ---
116+
117+
export type TunnelConnectErrorCategory = 'relayConnectionFailed' | 'auth' | 'network' | 'other';
118+
export type TunnelConnectFailureReason = 'hostOffline' | 'maxAttemptsReached';
119+
120+
type TunnelConnectAttemptEvent = {
121+
isReconnect: boolean;
122+
attempt: number;
123+
durationMs: number;
124+
success: boolean;
125+
errorCategory: string;
126+
};
127+
128+
type TunnelConnectAttemptClassification = {
129+
owner: 'osortega';
130+
comment: 'Tracks individual agent-host tunnel connect attempts for performance and reliability.';
131+
isReconnect: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Whether this attempt was part of a reconnect cycle (true) or an initial connect (false).' };
132+
attempt: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Attempt number within the current connect session (1-based).' };
133+
durationMs: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Duration of this individual attempt in milliseconds.' };
134+
success: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Whether this individual attempt succeeded.' };
135+
errorCategory: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; comment: 'Category of error when the attempt failed (relayConnectionFailed, auth, network, other); empty on success.' };
136+
};
137+
138+
export function logTunnelConnectAttempt(telemetryService: ITelemetryService, data: { isReconnect: boolean; attempt: number; durationMs: number; success: boolean; errorCategory?: TunnelConnectErrorCategory }): void {
139+
telemetryService.publicLog2<TunnelConnectAttemptEvent, TunnelConnectAttemptClassification>('vscodeAgents.tunnelConnect/attempt', {
140+
isReconnect: data.isReconnect,
141+
attempt: data.attempt,
142+
durationMs: data.durationMs,
143+
success: data.success,
144+
errorCategory: data.errorCategory ?? '',
145+
});
146+
}
147+
148+
type TunnelConnectResolvedEvent = {
149+
isReconnect: boolean;
150+
totalAttempts: number;
151+
totalDurationMs: number;
152+
success: boolean;
153+
failureReason: string;
154+
};
155+
156+
type TunnelConnectResolvedClassification = {
157+
owner: 'osortega';
158+
comment: 'Tracks overall agent-host tunnel connect session outcomes for reliability.';
159+
isReconnect: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Whether the resolved session was a reconnect cycle (true) or an initial connect (false).' };
160+
totalAttempts: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Total number of attempts made before resolution.' };
161+
totalDurationMs: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Total elapsed time from session start to resolution in milliseconds.' };
162+
success: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Whether the connect session ultimately succeeded.' };
163+
failureReason: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; comment: 'Reason the session terminated without connecting (hostOffline, maxAttemptsReached); empty on success.' };
164+
};
165+
166+
export function logTunnelConnectResolved(telemetryService: ITelemetryService, data: { isReconnect: boolean; totalAttempts: number; totalDurationMs: number; success: boolean; failureReason?: TunnelConnectFailureReason }): void {
167+
telemetryService.publicLog2<TunnelConnectResolvedEvent, TunnelConnectResolvedClassification>('vscodeAgents.tunnelConnect/resolved', {
168+
isReconnect: data.isReconnect,
169+
totalAttempts: data.totalAttempts,
170+
totalDurationMs: data.totalDurationMs,
171+
success: data.success,
172+
failureReason: data.failureReason ?? '',
173+
});
174+
}

0 commit comments

Comments
 (0)