Skip to content

Commit da84bc2

Browse files
committed
feat: add PET telemetry for refresh, configure, resolve, and process restart
Add four new telemetry events inside nativePythonFinder.ts to provide visibility into PET process behavior: - PET.REFRESH: tracks each refresh attempt with envCount, unresolvedCount, workspaceDirCount, searchPathCount, and attempt number - PET.CONFIGURE: tracks the configure RPC with workspace/env dir counts and retry state (including 'skipped' when config is unchanged) - PET.PROCESS_RESTART: tracks crash recovery with attempt number and result - PET.RESOLVE: tracks single-env resolution for fast-path and standalone calls All events include duration measurements and use classifyError() for consistent error categorization. Addresses the telemetry gap between PET.INIT_DURATION (spawn) and ENVIRONMENT_DISCOVERY (final result).
1 parent 1db132b commit da84bc2

2 files changed

Lines changed: 171 additions & 0 deletions

File tree

src/common/telemetry/constants.ts

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,42 @@ export enum EventNames {
122122
* - duration: number (milliseconds taken for the CLI operation)
123123
*/
124124
PET_JSON_CLI_FALLBACK = 'PET.JSON_CLI_FALLBACK',
125+
/**
126+
* Telemetry event for a PET refresh attempt (the core discovery RPC call).
127+
* Properties:
128+
* - result: 'success' | 'timeout' | 'error'
129+
* - envCount: number (environments returned via notifications)
130+
* - unresolvedCount: number (envs that needed follow-up resolve calls)
131+
* - workspaceDirCount: number (workspace directories sent in configure)
132+
* - searchPathCount: number (extra search paths sent in configure)
133+
* - attempt: number (0 = first try, 1 = retry)
134+
* - errorType: string (classified error category, on failure only)
135+
*/
136+
PET_REFRESH = 'PET.REFRESH',
137+
/**
138+
* Telemetry event for a PET configure RPC call.
139+
* Properties:
140+
* - result: 'success' | 'timeout' | 'error' | 'skipped'
141+
* - workspaceDirCount: number
142+
* - envDirCount: number (environmentDirectories count)
143+
* - retryCount: number (consecutive timeout count from ConfigureRetryState)
144+
*/
145+
PET_CONFIGURE = 'PET.CONFIGURE',
146+
/**
147+
* Telemetry event for PET process restart attempts.
148+
* Properties:
149+
* - attempt: number (1-based restart attempt number)
150+
* - result: 'success' | 'error'
151+
* - errorType: string (classified error category, on failure only)
152+
*/
153+
PET_PROCESS_RESTART = 'PET.PROCESS_RESTART',
154+
/**
155+
* Telemetry event for PET resolve calls (single-env resolution).
156+
* Properties:
157+
* - result: 'success' | 'timeout' | 'error'
158+
* - errorType: string (classified error category, on failure only)
159+
*/
160+
PET_RESOLVE = 'PET.RESOLVE',
125161
}
126162

127163
// Map all events to their properties
@@ -424,4 +460,68 @@ export interface IEventNamePropertyMapping {
424460
operation: 'refresh' | 'resolve';
425461
result: 'success' | 'error';
426462
};
463+
464+
/* __GDPR__
465+
"pet.refresh": {
466+
"result": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
467+
"envCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
468+
"unresolvedCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
469+
"workspaceDirCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
470+
"searchPathCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
471+
"attempt": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
472+
"errorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
473+
"<duration>": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }
474+
}
475+
*/
476+
[EventNames.PET_REFRESH]: {
477+
result: 'success' | 'timeout' | 'error';
478+
envCount?: number;
479+
unresolvedCount?: number;
480+
workspaceDirCount?: number;
481+
searchPathCount?: number;
482+
attempt: number;
483+
errorType?: string;
484+
};
485+
486+
/* __GDPR__
487+
"pet.configure": {
488+
"result": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
489+
"workspaceDirCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
490+
"envDirCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
491+
"retryCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
492+
"<duration>": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }
493+
}
494+
*/
495+
[EventNames.PET_CONFIGURE]: {
496+
result: 'success' | 'timeout' | 'error' | 'skipped';
497+
workspaceDirCount?: number;
498+
envDirCount?: number;
499+
retryCount: number;
500+
};
501+
502+
/* __GDPR__
503+
"pet.process_restart": {
504+
"attempt": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
505+
"result": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
506+
"errorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
507+
"<duration>": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }
508+
}
509+
*/
510+
[EventNames.PET_PROCESS_RESTART]: {
511+
attempt: number;
512+
result: 'success' | 'error';
513+
errorType?: string;
514+
};
515+
516+
/* __GDPR__
517+
"pet.resolve": {
518+
"result": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
519+
"errorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
520+
"<duration>": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }
521+
}
522+
*/
523+
[EventNames.PET_RESOLVE]: {
524+
result: 'success' | 'timeout' | 'error';
525+
errorType?: string;
526+
};
427527
}

src/managers/common/nativePythonFinder.ts

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import { getExtension } from '../../common/extension.apis';
1111
import { traceError, traceVerbose, traceWarn } from '../../common/logging';
1212
import { StopWatch } from '../../common/stopWatch';
1313
import { EventNames } from '../../common/telemetry/constants';
14+
import { classifyError } from '../../common/telemetry/errorClassifier';
1415
import { sendTelemetryEvent } from '../../common/telemetry/sender';
1516
import { untildify, untildifyArray } from '../../common/utils/pathUtils';
1617
import { isWindows } from '../../common/utils/platformUtils';
@@ -253,6 +254,7 @@ class NativePythonFinderImpl implements NativePythonFinder {
253254
}
254255

255256
public async resolve(executable: string): Promise<NativeEnvInfo> {
257+
const sw = new StopWatch();
256258
try {
257259
await this.ensureProcessRunning();
258260
try {
@@ -267,6 +269,7 @@ class NativePythonFinderImpl implements NativePythonFinder {
267269
this.outputChannel.info(`Resolved Python Environment ${environment.executable}`);
268270
// Reset restart attempts on successful request
269271
this.restartAttempts = 0;
272+
sendTelemetryEvent(EventNames.PET_RESOLVE, sw.elapsedTime, { result: 'success' });
270273
return environment;
271274
} catch (ex) {
272275
// On resolve timeout or connection error (not configure — configure handles its own timeout),
@@ -280,6 +283,16 @@ class NativePythonFinderImpl implements NativePythonFinder {
280283
throw ex;
281284
}
282285
} catch (ex) {
286+
const errorType = classifyError(ex);
287+
sendTelemetryEvent(
288+
EventNames.PET_RESOLVE,
289+
sw.elapsedTime,
290+
{
291+
result: errorType === 'spawn_timeout' ? 'timeout' : 'error',
292+
errorType,
293+
},
294+
ex instanceof Error ? ex : undefined,
295+
);
283296
// If the server mode is fully exhausted, fall back to the CLI JSON mode
284297
if (this.isServerExhausted()) {
285298
this.outputChannel.warn('[pet] Server mode exhausted, falling back to JSON CLI for resolve');
@@ -325,13 +338,15 @@ class NativePythonFinderImpl implements NativePythonFinder {
325338
private async restart(): Promise<void> {
326339
this.isRestarting = true;
327340
this.restartAttempts++;
341+
const attempt = this.restartAttempts;
328342

329343
const backoffMs = RESTART_BACKOFF_BASE_MS * Math.pow(2, this.restartAttempts - 1);
330344
this.outputChannel.warn(
331345
`[pet] Restarting Python Environment Tools (attempt ${this.restartAttempts}/${MAX_RESTART_ATTEMPTS}, ` +
332346
`waiting ${backoffMs}ms)`,
333347
);
334348

349+
const sw = new StopWatch();
335350
try {
336351
// Kill existing process if still running
337352
this.killProcess();
@@ -353,10 +368,17 @@ class NativePythonFinderImpl implements NativePythonFinder {
353368
this.connection = this.start();
354369

355370
this.outputChannel.info('[pet] Python Environment Tools restarted successfully');
371+
sendTelemetryEvent(EventNames.PET_PROCESS_RESTART, sw.elapsedTime, { attempt, result: 'success' });
356372

357373
// Reset restart attempts on successful start (process didn't immediately fail)
358374
// We'll reset this only after a successful request completes
359375
} catch (ex) {
376+
sendTelemetryEvent(
377+
EventNames.PET_PROCESS_RESTART,
378+
sw.elapsedTime,
379+
{ attempt, result: 'error', errorType: classifyError(ex) },
380+
ex instanceof Error ? ex : undefined,
381+
);
360382
this.outputChannel.error('[pet] Failed to restart Python Environment Tools:', ex);
361383
this.outputChannel.error(
362384
'[pet] To debug, run "Python Environments: Run Python Environment Tool (PET) in Terminal" from the Command Palette.',
@@ -634,13 +656,18 @@ class NativePythonFinderImpl implements NativePythonFinder {
634656
const disposables: Disposable[] = [];
635657
const unresolved: Promise<void>[] = [];
636658
const nativeInfo: NativeInfo[] = [];
659+
const sw = new StopWatch();
660+
let unresolvedCount = 0;
637661
try {
638662
await this.configure();
639663
const refreshOptions = this.getRefreshOptions(options);
664+
const workspaceDirCount = this.lastConfiguration?.workspaceDirectories.length ?? 0;
665+
const searchPathCount = this.lastConfiguration?.environmentDirectories.length ?? 0;
640666
disposables.push(
641667
this.connection.onNotification('environment', (data: NativeEnvInfo) => {
642668
this.outputChannel.info(`Discovered env: ${data.executable || data.prefix}`);
643669
if (data.executable && (!data.version || !data.prefix)) {
670+
unresolvedCount++;
644671
unresolved.push(
645672
sendRequestWithTimeout<NativeEnvInfo>(
646673
this.connection,
@@ -680,7 +707,29 @@ class NativePythonFinderImpl implements NativePythonFinder {
680707
if (attempt > 0) {
681708
this.outputChannel.info(`[pet] Refresh succeeded on retry attempt ${attempt + 1}`);
682709
}
710+
711+
sendTelemetryEvent(EventNames.PET_REFRESH, sw.elapsedTime, {
712+
result: 'success',
713+
envCount: nativeInfo.filter((e) => isNativeEnvInfo(e)).length,
714+
unresolvedCount,
715+
workspaceDirCount,
716+
searchPathCount,
717+
attempt,
718+
});
683719
} catch (ex) {
720+
const errorType = classifyError(ex);
721+
sendTelemetryEvent(
722+
EventNames.PET_REFRESH,
723+
sw.elapsedTime,
724+
{
725+
result: errorType === 'spawn_timeout' ? 'timeout' : 'error',
726+
envCount: nativeInfo.filter((e) => isNativeEnvInfo(e)).length,
727+
unresolvedCount,
728+
attempt,
729+
errorType,
730+
},
731+
ex instanceof Error ? ex : undefined,
732+
);
684733
// On refresh timeout or connection error (not configure — configure handles its own timeout),
685734
// kill the hung process so next request triggers restart
686735
if ((ex instanceof RpcTimeoutError && ex.method !== 'configure') || ex instanceof rpc.ConnectionError) {
@@ -709,6 +758,7 @@ class NativePythonFinderImpl implements NativePythonFinder {
709758
// No need to send a configuration request if there are no changes.
710759
if (this.lastConfiguration && this.configurationEquals(options, this.lastConfiguration)) {
711760
this.outputChannel.debug('[pet] configure: No changes detected, skipping configuration update.');
761+
sendTelemetryEvent(EventNames.PET_CONFIGURE, 0, { result: 'skipped', retryCount: 0 });
712762
return;
713763
}
714764
this.outputChannel.info('[pet] configure: Sending configuration update:', JSON.stringify(options));
@@ -719,12 +769,33 @@ class NativePythonFinderImpl implements NativePythonFinder {
719769
`[pet] configure: Using extended timeout of ${timeoutMs}ms (retry ${this.configureRetry.timeoutCount})`,
720770
);
721771
}
772+
const sw = new StopWatch();
773+
const retryCount = this.configureRetry.timeoutCount;
774+
const workspaceDirCount = options.workspaceDirectories.length;
775+
const envDirCount = options.environmentDirectories.length;
722776
try {
723777
await sendRequestWithTimeout(this.connection, 'configure', options, timeoutMs);
724778
// Only cache after success so failed/timed-out calls will retry
725779
this.lastConfiguration = options;
726780
this.configureRetry.onSuccess();
781+
sendTelemetryEvent(EventNames.PET_CONFIGURE, sw.elapsedTime, {
782+
result: 'success',
783+
workspaceDirCount,
784+
envDirCount,
785+
retryCount,
786+
});
727787
} catch (ex) {
788+
sendTelemetryEvent(
789+
EventNames.PET_CONFIGURE,
790+
sw.elapsedTime,
791+
{
792+
result: ex instanceof RpcTimeoutError ? 'timeout' : 'error',
793+
workspaceDirCount,
794+
envDirCount,
795+
retryCount,
796+
},
797+
ex instanceof Error ? ex : undefined,
798+
);
728799
// Clear cached config so the next call retries instead of short-circuiting via configurationEquals
729800
this.lastConfiguration = undefined;
730801
if (ex instanceof RpcTimeoutError) {

0 commit comments

Comments
 (0)