Skip to content

Commit f1d7927

Browse files
committed
feat: add PET telemetry for refresh, configure, resolve, and process restart
Add four new telemetry events inside nativePythonFinder.ts to provide visibility into PET process behavior: - PET.REFRESH: tracks each refresh attempt with envCount, unresolvedCount, workspaceDirCount, searchPathCount, and attempt number - PET.CONFIGURE: tracks the configure RPC with workspace/env dir counts and retry state (including 'skipped' when config is unchanged) - PET.PROCESS_RESTART: tracks crash recovery with attempt number and result - PET.RESOLVE: tracks single-env resolution for fast-path and standalone calls All events include duration measurements and use classifyError() for consistent error categorization. Addresses the telemetry gap between PET.INIT_DURATION (spawn) and ENVIRONMENT_DISCOVERY (final result).
1 parent a82aa97 commit f1d7927

2 files changed

Lines changed: 174 additions & 0 deletions

File tree

src/common/telemetry/constants.ts

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,42 @@ export enum EventNames {
113113
* - errorType: string (classified error category, on failure only)
114114
*/
115115
MANAGER_LAZY_INIT = 'MANAGER.LAZY_INIT',
116+
/**
117+
* Telemetry event for a PET refresh attempt (the core discovery RPC call).
118+
* Properties:
119+
* - result: 'success' | 'timeout' | 'error'
120+
* - envCount: number (environments returned via notifications)
121+
* - unresolvedCount: number (envs that needed follow-up resolve calls)
122+
* - workspaceDirCount: number (workspace directories sent in configure)
123+
* - searchPathCount: number (extra search paths sent in configure)
124+
* - attempt: number (0 = first try, 1 = retry)
125+
* - errorType: string (classified error category, on failure only)
126+
*/
127+
PET_REFRESH = 'PET.REFRESH',
128+
/**
129+
* Telemetry event for a PET configure RPC call.
130+
* Properties:
131+
* - result: 'success' | 'timeout' | 'error' | 'skipped'
132+
* - workspaceDirCount: number
133+
* - envDirCount: number (environmentDirectories count)
134+
* - retryCount: number (consecutive timeout count from ConfigureRetryState)
135+
*/
136+
PET_CONFIGURE = 'PET.CONFIGURE',
137+
/**
138+
* Telemetry event for PET process restart attempts.
139+
* Properties:
140+
* - attempt: number (1-based restart attempt number)
141+
* - result: 'success' | 'error'
142+
* - errorType: string (classified error category, on failure only)
143+
*/
144+
PET_PROCESS_RESTART = 'PET.PROCESS_RESTART',
145+
/**
146+
* Telemetry event for PET resolve calls (single-env resolution).
147+
* Properties:
148+
* - result: 'success' | 'timeout' | 'error'
149+
* - errorType: string (classified error category, on failure only)
150+
*/
151+
PET_RESOLVE = 'PET.RESOLVE',
116152
}
117153

118154
// Map all events to their properties
@@ -403,4 +439,68 @@ export interface IEventNamePropertyMapping {
403439
toolSource: string;
404440
errorType?: string;
405441
};
442+
443+
/* __GDPR__
444+
"pet.refresh": {
445+
"result": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
446+
"envCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
447+
"unresolvedCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
448+
"workspaceDirCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
449+
"searchPathCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
450+
"attempt": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
451+
"errorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
452+
"<duration>": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }
453+
}
454+
*/
455+
[EventNames.PET_REFRESH]: {
456+
result: 'success' | 'timeout' | 'error';
457+
envCount?: number;
458+
unresolvedCount?: number;
459+
workspaceDirCount?: number;
460+
searchPathCount?: number;
461+
attempt: number;
462+
errorType?: string;
463+
};
464+
465+
/* __GDPR__
466+
"pet.configure": {
467+
"result": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
468+
"workspaceDirCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
469+
"envDirCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
470+
"retryCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
471+
"<duration>": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }
472+
}
473+
*/
474+
[EventNames.PET_CONFIGURE]: {
475+
result: 'success' | 'timeout' | 'error' | 'skipped';
476+
workspaceDirCount?: number;
477+
envDirCount?: number;
478+
retryCount: number;
479+
};
480+
481+
/* __GDPR__
482+
"pet.process_restart": {
483+
"attempt": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
484+
"result": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
485+
"errorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
486+
"<duration>": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }
487+
}
488+
*/
489+
[EventNames.PET_PROCESS_RESTART]: {
490+
attempt: number;
491+
result: 'success' | 'error';
492+
errorType?: string;
493+
};
494+
495+
/* __GDPR__
496+
"pet.resolve": {
497+
"result": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
498+
"errorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
499+
"<duration>": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }
500+
}
501+
*/
502+
[EventNames.PET_RESOLVE]: {
503+
result: 'success' | 'timeout' | 'error';
504+
errorType?: string;
505+
};
406506
}

src/managers/common/nativePythonFinder.ts

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ import { spawnProcess } from '../../common/childProcess.apis';
99
import { ENVS_EXTENSION_ID, PYTHON_EXTENSION_ID } from '../../common/constants';
1010
import { getExtension } from '../../common/extension.apis';
1111
import { traceError, traceVerbose, traceWarn } from '../../common/logging';
12+
import { StopWatch } from '../../common/stopWatch';
13+
import { EventNames } from '../../common/telemetry/constants';
14+
import { classifyError } from '../../common/telemetry/errorClassifier';
15+
import { sendTelemetryEvent } from '../../common/telemetry/sender';
1216
import { untildify, untildifyArray } from '../../common/utils/pathUtils';
1317
import { isWindows } from '../../common/utils/platformUtils';
1418
import { createRunningWorkerPool, WorkerPool } from '../../common/utils/workerPool';
@@ -246,6 +250,7 @@ class NativePythonFinderImpl implements NativePythonFinder {
246250

247251
public async resolve(executable: string): Promise<NativeEnvInfo> {
248252
await this.ensureProcessRunning();
253+
const sw = new StopWatch();
249254
try {
250255
await this.configure();
251256
const environment = await sendRequestWithTimeout<NativeEnvInfo>(
@@ -258,8 +263,19 @@ class NativePythonFinderImpl implements NativePythonFinder {
258263
this.outputChannel.info(`Resolved Python Environment ${environment.executable}`);
259264
// Reset restart attempts on successful request
260265
this.restartAttempts = 0;
266+
sendTelemetryEvent(EventNames.PET_RESOLVE, sw.elapsedTime, { result: 'success' });
261267
return environment;
262268
} catch (ex) {
269+
const errorType = classifyError(ex);
270+
sendTelemetryEvent(
271+
EventNames.PET_RESOLVE,
272+
sw.elapsedTime,
273+
{
274+
result: errorType === 'spawn_timeout' ? 'timeout' : 'error',
275+
errorType,
276+
},
277+
ex instanceof Error ? ex : undefined,
278+
);
263279
// On resolve timeout or connection error (not configure — configure handles its own timeout),
264280
// kill the hung process so next request triggers restart
265281
if ((ex instanceof RpcTimeoutError && ex.method !== 'configure') || ex instanceof rpc.ConnectionError) {
@@ -308,13 +324,15 @@ class NativePythonFinderImpl implements NativePythonFinder {
308324
private async restart(): Promise<void> {
309325
this.isRestarting = true;
310326
this.restartAttempts++;
327+
const attempt = this.restartAttempts;
311328

312329
const backoffMs = RESTART_BACKOFF_BASE_MS * Math.pow(2, this.restartAttempts - 1);
313330
this.outputChannel.warn(
314331
`[pet] Restarting Python Environment Tools (attempt ${this.restartAttempts}/${MAX_RESTART_ATTEMPTS}, ` +
315332
`waiting ${backoffMs}ms)`,
316333
);
317334

335+
const sw = new StopWatch();
318336
try {
319337
// Kill existing process if still running
320338
this.killProcess();
@@ -336,10 +354,17 @@ class NativePythonFinderImpl implements NativePythonFinder {
336354
this.connection = this.start();
337355

338356
this.outputChannel.info('[pet] Python Environment Tools restarted successfully');
357+
sendTelemetryEvent(EventNames.PET_PROCESS_RESTART, sw.elapsedTime, { attempt, result: 'success' });
339358

340359
// Reset restart attempts on successful start (process didn't immediately fail)
341360
// We'll reset this only after a successful request completes
342361
} catch (ex) {
362+
sendTelemetryEvent(
363+
EventNames.PET_PROCESS_RESTART,
364+
sw.elapsedTime,
365+
{ attempt, result: 'error', errorType: classifyError(ex) },
366+
ex instanceof Error ? ex : undefined,
367+
);
343368
this.outputChannel.error('[pet] Failed to restart Python Environment Tools:', ex);
344369
this.outputChannel.error(
345370
'[pet] To debug, run "Python Environments: Run Python Environment Tool (PET) in Terminal" from the Command Palette.',
@@ -609,13 +634,18 @@ class NativePythonFinderImpl implements NativePythonFinder {
609634
const disposables: Disposable[] = [];
610635
const unresolved: Promise<void>[] = [];
611636
const nativeInfo: NativeInfo[] = [];
637+
const sw = new StopWatch();
638+
let unresolvedCount = 0;
612639
try {
613640
await this.configure();
614641
const refreshOptions = this.getRefreshOptions(options);
642+
const workspaceDirCount = this.lastConfiguration?.workspaceDirectories.length ?? 0;
643+
const searchPathCount = this.lastConfiguration?.environmentDirectories.length ?? 0;
615644
disposables.push(
616645
this.connection.onNotification('environment', (data: NativeEnvInfo) => {
617646
this.outputChannel.info(`Discovered env: ${data.executable || data.prefix}`);
618647
if (data.executable && (!data.version || !data.prefix)) {
648+
unresolvedCount++;
619649
unresolved.push(
620650
sendRequestWithTimeout<NativeEnvInfo>(
621651
this.connection,
@@ -655,7 +685,29 @@ class NativePythonFinderImpl implements NativePythonFinder {
655685
if (attempt > 0) {
656686
this.outputChannel.info(`[pet] Refresh succeeded on retry attempt ${attempt + 1}`);
657687
}
688+
689+
sendTelemetryEvent(EventNames.PET_REFRESH, sw.elapsedTime, {
690+
result: 'success',
691+
envCount: nativeInfo.filter((e) => isNativeEnvInfo(e)).length,
692+
unresolvedCount,
693+
workspaceDirCount,
694+
searchPathCount,
695+
attempt,
696+
});
658697
} catch (ex) {
698+
const errorType = classifyError(ex);
699+
sendTelemetryEvent(
700+
EventNames.PET_REFRESH,
701+
sw.elapsedTime,
702+
{
703+
result: errorType === 'spawn_timeout' ? 'timeout' : 'error',
704+
envCount: nativeInfo.filter((e) => isNativeEnvInfo(e)).length,
705+
unresolvedCount,
706+
attempt,
707+
errorType,
708+
},
709+
ex instanceof Error ? ex : undefined,
710+
);
659711
// On refresh timeout or connection error (not configure — configure handles its own timeout),
660712
// kill the hung process so next request triggers restart
661713
if ((ex instanceof RpcTimeoutError && ex.method !== 'configure') || ex instanceof rpc.ConnectionError) {
@@ -694,6 +746,7 @@ class NativePythonFinderImpl implements NativePythonFinder {
694746
// No need to send a configuration request if there are no changes.
695747
if (this.lastConfiguration && this.configurationEquals(options, this.lastConfiguration)) {
696748
this.outputChannel.debug('[pet] configure: No changes detected, skipping configuration update.');
749+
sendTelemetryEvent(EventNames.PET_CONFIGURE, 0, { result: 'skipped', retryCount: 0 });
697750
return;
698751
}
699752
this.outputChannel.info('[pet] configure: Sending configuration update:', JSON.stringify(options));
@@ -704,12 +757,33 @@ class NativePythonFinderImpl implements NativePythonFinder {
704757
`[pet] configure: Using extended timeout of ${timeoutMs}ms (retry ${this.configureRetry.timeoutCount})`,
705758
);
706759
}
760+
const sw = new StopWatch();
761+
const retryCount = this.configureRetry.timeoutCount;
762+
const workspaceDirCount = options.workspaceDirectories.length;
763+
const envDirCount = options.environmentDirectories.length;
707764
try {
708765
await sendRequestWithTimeout(this.connection, 'configure', options, timeoutMs);
709766
// Only cache after success so failed/timed-out calls will retry
710767
this.lastConfiguration = options;
711768
this.configureRetry.onSuccess();
769+
sendTelemetryEvent(EventNames.PET_CONFIGURE, sw.elapsedTime, {
770+
result: 'success',
771+
workspaceDirCount,
772+
envDirCount,
773+
retryCount,
774+
});
712775
} catch (ex) {
776+
sendTelemetryEvent(
777+
EventNames.PET_CONFIGURE,
778+
sw.elapsedTime,
779+
{
780+
result: ex instanceof RpcTimeoutError ? 'timeout' : 'error',
781+
workspaceDirCount,
782+
envDirCount,
783+
retryCount,
784+
},
785+
ex instanceof Error ? ex : undefined,
786+
);
713787
// Clear cached config so the next call retries instead of short-circuiting via configurationEquals
714788
this.lastConfiguration = undefined;
715789
if (ex instanceof RpcTimeoutError) {

0 commit comments

Comments
 (0)