Skip to content

Commit 543cc19

Browse files
committed
fixes
1 parent 29b4cbe commit 543cc19

File tree

3 files changed

+102
-134
lines changed

3 files changed

+102
-134
lines changed

.github/workflows/chat-perf.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ jobs:
257257
uses: actions/upload-artifact@v7
258258
with:
259259
name: perf-results-${{ matrix.group }}
260+
include-hidden-files: true
260261
path: |
261262
perf-output.log
262263
.chat-simulation-data/
@@ -340,6 +341,7 @@ jobs:
340341
uses: actions/upload-artifact@v7
341342
with:
342343
name: leak-results
344+
include-hidden-files: true
343345
path: |
344346
leak-output.log
345347
.chat-simulation-data/chat-simulation-leak-results.json

scripts/chat-simulation/common/utils.js

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,17 @@ function buildEnv(mockServer, { isDevBuild = true } = {}) {
193193
* @param {string} logsDir
194194
* @returns {string[]}
195195
*/
196-
function buildArgs(userDataDir, extDir, logsDir, { isDevBuild = true, extHostInspectPort = 0 } = {}) {
196+
function buildArgs(userDataDir, extDir, logsDir, { isDevBuild = true, extHostInspectPort = 0, traceFile = '' } = {}) {
197+
// Chromium switches must come BEFORE the app path (ROOT) — Chromium
198+
// only processes switches that precede the first non-switch argument.
199+
const chromiumFlags = [];
200+
if (traceFile) {
201+
chromiumFlags.push(`--enable-tracing=v8.gc,devtools.timeline,blink.user_timing`);
202+
chromiumFlags.push(`--trace-startup-file=${traceFile}`);
203+
chromiumFlags.push(`--enable-tracing-format=json`);
204+
}
197205
const args = [
206+
...chromiumFlags,
198207
ROOT,
199208
'--skip-release-notes',
200209
'--skip-welcome',
@@ -505,18 +514,19 @@ async function launchVSCode(executable, launchArgs, env, opts = {}) {
505514
page,
506515
browser,
507516
close: async () => {
508-
await browser.close().catch(() => { });
509-
const pid = child.pid;
510-
if (pid) {
511-
if (process.platform === 'win32') {
512-
try { execSync(`taskkill /F /T /PID ${pid}`, { stdio: 'ignore' }); }
513-
catch { }
514-
} else {
515-
try { execSync(`pkill -TERM -P ${pid}`, { stdio: 'ignore' }); }
516-
catch { }
517-
child.kill('SIGTERM');
518-
}
517+
// Trigger app.quit() so Chromium flushes trace buffers and
518+
// writes --trace-startup-file. Using Cmd+Q / Alt+F4 triggers
519+
// the full Electron quit lifecycle including trace flush.
520+
// window.close() only closes the BrowserWindow without
521+
// triggering app-level quit.
522+
try {
523+
const quitKey = process.platform === 'darwin' ? 'Meta+KeyQ' : 'Alt+F4';
524+
await page.keyboard.press(quitKey);
525+
} catch {
526+
// Page may already be closed
519527
}
528+
const pid = child.pid;
529+
// Wait for graceful exit (up to 30s for trace flush)
520530
await new Promise(resolve => {
521531
const timer = setTimeout(() => {
522532
if (pid) {
@@ -525,9 +535,11 @@ async function launchVSCode(executable, launchArgs, env, opts = {}) {
525535
}
526536
child.kill('SIGKILL');
527537
resolve(undefined);
528-
}, 3000);
538+
}, 30_000);
529539
child.once('exit', () => { clearTimeout(timer); resolve(undefined); });
530540
});
541+
// Disconnect CDP after the process has exited
542+
await browser.close().catch(() => { });
531543
// Kill crashpad handler — it self-daemonizes and outlives the
532544
// parent. Wait briefly for it to detach, then kill by pattern.
533545
await new Promise(r => setTimeout(r, 500));

scripts/chat-simulation/test-chat-perf-regression.js

Lines changed: 75 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -172,10 +172,11 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
172172
const runDiagDir = path.join(runDir, `${role}-${buildLabel}`, runIndex.replace(/^baseline-/, ''));
173173
fs.mkdirSync(runDiagDir, { recursive: true });
174174

175+
const tracePath = path.join(runDiagDir, 'trace.json');
175176
const extHostInspectPort = getNextExtHostInspectPort();
176177
const vscode = await launchVSCode(
177178
electronPath,
178-
buildArgs(userDataDir, extDir, logsDir, { isDevBuild, extHostInspectPort }),
179+
buildArgs(userDataDir, extDir, logsDir, { isDevBuild, extHostInspectPort, traceFile: tracePath }),
179180
buildEnv(mockServer, { isDevBuild }),
180181
{ verbose },
181182
);
@@ -187,6 +188,12 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
187188
let extHostInspector = null;
188189
/** @type {{ usedSize: number, totalSize: number } | null} */
189190
let extHostHeapBefore = null;
191+
/** @type {Omit<RunMetrics, 'majorGCs' | 'minorGCs' | 'gcDurationMs' | 'longTaskCount' | 'timeToUIUpdated' | 'timeToFirstToken' | 'timeToComplete' | 'instructionCollectionTime' | 'agentInvokeTime' | 'hasInternalMarks' | 'internalFirstToken'> | null} */
192+
let partialMetrics = null;
193+
// Timing vars hoisted for access in post-close trace parsing
194+
let submitTime = 0;
195+
let firstResponseTime = 0;
196+
let responseCompleteTime = 0;
190197

191198
try {
192199
await window.waitForSelector('.monaco-workbench', { timeout: 60_000 });
@@ -195,14 +202,6 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
195202
await cdp.send('Performance.enable');
196203
const heapBefore = /** @type {any} */ (await cdp.send('Runtime.getHeapUsage'));
197204

198-
// Stop any existing tracing session (stable builds may have one active)
199-
try { await cdp.send('Tracing.end'); await new Promise(r => setTimeout(r, 200)); } catch { }
200-
await cdp.send('Tracing.start', {
201-
traceConfig: {
202-
includedCategories: ['v8.gc', 'devtools.timeline'],
203-
recordMode: 'recordContinuously',
204-
}
205-
});
206205
const metricsBefore = await cdp.send('Performance.getMetrics');
207206

208207
// Open chat
@@ -319,53 +318,14 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
319318
}
320319
});
321320

322-
// Use a PerformanceObserver to capture code/chat/* marks as they're
323-
// emitted. This is event-driven (no polling) and captures marks
324-
// even if they're cleared immediately after emission.
325-
await window.evaluate(() => {
326-
// @ts-ignore
327-
globalThis._chatPerfCapture = [];
328-
try {
329-
// @ts-ignore
330-
globalThis._chatPerfObserver = new PerformanceObserver((list) => {
331-
for (const entry of list.getEntries()) {
332-
if (entry.name.startsWith('code/chat/')) {
333-
const timeOrigin = performance.timeOrigin ?? 0;
334-
// @ts-ignore
335-
globalThis._chatPerfCapture.push({
336-
name: entry.name,
337-
startTime: Math.round(timeOrigin + entry.startTime),
338-
});
339-
}
340-
}
341-
});
342-
// @ts-ignore
343-
globalThis._chatPerfObserver.observe({ type: 'mark', buffered: false });
344-
} catch {
345-
// PerformanceObserver not available — fall back to polling
346-
// @ts-ignore
347-
globalThis._chatPerfPollId = setInterval(() => {
348-
// @ts-ignore
349-
const marks = globalThis.MonacoPerformanceMarks?.getMarks() ?? [];
350-
for (const m of marks) {
351-
// @ts-ignore
352-
if (m.name.startsWith('code/chat/') && !globalThis._chatPerfCapture.some(c => c.name === m.name)) {
353-
// @ts-ignore
354-
globalThis._chatPerfCapture.push({ name: m.name, startTime: m.startTime });
355-
}
356-
}
357-
}, 16);
358-
}
359-
});
360-
361321
// Submit
362322
const completionsBefore = mockServer.completionCount();
363-
const submitTime = Date.now();
323+
submitTime = Date.now();
364324
await window.keyboard.press('Enter');
365325

366326
// Wait for mock server to serve the response
367327
try { await mockServer.waitForCompletion(completionsBefore + 1, 60_000); } catch { }
368-
const firstResponseTime = Date.now();
328+
firstResponseTime = Date.now();
369329

370330
// Wait for DOM response to settle
371331
await dismissDialog();
@@ -378,7 +338,7 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
378338
},
379339
responseSelector, { timeout: 30_000 },
380340
);
381-
let responseCompleteTime = Date.now();
341+
responseCompleteTime = Date.now();
382342

383343
// -- User turn injection loop -----------------------------------------
384344
// For multi-turn scenarios with user follow-ups, type each follow-up
@@ -478,26 +438,6 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
478438
console.log(` [debug] Client-side timing: firstResponse=${firstResponseTime - submitTime}ms, complete=${responseCompleteTime - submitTime}ms`);
479439
}
480440

481-
// Collect perf marks and tear down the observer/poll
482-
const chatMarks = await window.evaluate(() => {
483-
// @ts-ignore
484-
if (globalThis._chatPerfObserver) { globalThis._chatPerfObserver.disconnect(); }
485-
// @ts-ignore
486-
if (globalThis._chatPerfPollId) { clearInterval(globalThis._chatPerfPollId); }
487-
// @ts-ignore
488-
const marks = globalThis._chatPerfCapture ?? [];
489-
// @ts-ignore
490-
delete globalThis._chatPerfCapture;
491-
// @ts-ignore
492-
delete globalThis._chatPerfObserver;
493-
// @ts-ignore
494-
delete globalThis._chatPerfPollId;
495-
return marks;
496-
});
497-
if (verbose && chatMarks.length > 0) {
498-
console.log(` [debug] chatMarks (${chatMarks.length}): ${chatMarks.map((/** @type {any} */ m) => m.name.split('/').slice(-1)[0]).join(', ')}`);
499-
}
500-
501441
// Collect Long Animation Frame entries and tear down the observer
502442
const loafData = await window.evaluate(() => {
503443
// @ts-ignore
@@ -514,20 +454,8 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
514454
});
515455

516456
const heapAfter = /** @type {any} */ (await cdp.send('Runtime.getHeapUsage'));
517-
/** @type {Array<any>} */
518-
const traceEvents = [];
519-
cdp.on('Tracing.dataCollected', (/** @type {any} */ data) => { traceEvents.push(...data.value); });
520-
const tracingComplete = new Promise(resolve => {
521-
cdp.once('Tracing.tracingComplete', () => resolve(undefined));
522-
});
523-
await cdp.send('Tracing.end');
524-
await tracingComplete;
525457
const metricsAfter = await cdp.send('Performance.getMetrics');
526458

527-
// Save performance trace (Chrome DevTools format)
528-
const tracePath = path.join(runDiagDir, 'trace.json');
529-
fs.writeFileSync(tracePath, JSON.stringify({ traceEvents }));
530-
531459
// Take heap snapshot
532460
const snapshotPath = path.join(runDiagDir, 'heap.heapsnapshot');
533461
await cdp.send('HeapProfiler.enable');
@@ -594,46 +522,15 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
594522
}
595523
}
596524

597-
// Parse timing — prefer internal code/chat/* marks (precise, in-process)
598-
// with client-side Date.now() as fallback for older builds without marks.
599-
const timeToUIUpdated = markDuration(chatMarks, 'request/start', 'request/uiUpdated');
600-
const internalFirstToken = markDuration(chatMarks, 'request/start', 'request/firstToken');
601-
const timeToFirstToken = internalFirstToken >= 0 ? internalFirstToken : (firstResponseTime - submitTime);
602-
const timeToComplete = responseCompleteTime - submitTime;
603-
const instructionCollectionTime = markDuration(chatMarks, 'request/willCollectInstructions', 'request/didCollectInstructions');
604-
const agentInvokeTime = markDuration(chatMarks, 'agent/willInvoke', 'agent/didInvoke');
605-
606-
// Parse GC events from trace.
607-
// Use the trace-event category and phase fields which are stable
608-
// across V8 versions, rather than matching event name substrings.
609-
let majorGCs = 0, minorGCs = 0, gcDurationMs = 0;
610-
for (const event of traceEvents) {
611-
const isGC = event.cat === 'v8.gc'
612-
|| event.cat === 'devtools.timeline,v8'
613-
|| (typeof event.cat === 'string' && event.cat.split(',').some((/** @type {string} */ c) => c.trim() === 'v8.gc'));
614-
if (!isGC) { continue; }
615-
// Only count complete ('X') or duration-begin ('B') events to
616-
// avoid double-counting begin/end pairs.
617-
if (event.ph && event.ph !== 'X' && event.ph !== 'B') { continue; }
618-
const name = event.name || '';
619-
if (/Major|MarkCompact|MSC|MC|IncrementalMarking|FinalizeMC/i.test(name)) { majorGCs++; }
620-
else if (/Minor|Scaveng/i.test(name)) { minorGCs++; }
621-
else { minorGCs++; } // default unknown GC events to minor
622-
if (event.dur) { gcDurationMs += event.dur / 1000; }
623-
}
624-
let longTaskCount = 0;
625-
for (const event of traceEvents) {
626-
if (event.name === 'RunTask' && event.dur && event.dur > 50_000) { longTaskCount++; }
627-
}
525+
// Store partial metrics here so we can combine with trace data after close.
628526

629527
/** @param {any} r @param {string} name */
630528
function getMetric(r, name) {
631529
const e = r.metrics?.find((/** @type {any} */ m) => m.name === name);
632530
return e ? e.value : 0;
633531
}
634532

635-
return {
636-
timeToUIUpdated, timeToFirstToken, timeToComplete, instructionCollectionTime, agentInvokeTime,
533+
partialMetrics = {
637534
heapUsedBefore: Math.round(heapBefore.usedSize / 1024 / 1024),
638535
heapUsedAfter: Math.round(heapAfter.usedSize / 1024 / 1024),
639536
heapDelta: Math.round((heapAfter.usedSize - heapBefore.usedSize) / 1024 / 1024),
@@ -650,20 +547,15 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
650547
return -1; // gc() not available in this build
651548
}
652549
})(),
653-
majorGCs, minorGCs,
654-
gcDurationMs: Math.round(gcDurationMs * 100) / 100,
655550
layoutCount: getMetric(metricsAfter, 'LayoutCount') - getMetric(metricsBefore, 'LayoutCount'),
656551
recalcStyleCount: getMetric(metricsAfter, 'RecalcStyleCount') - getMetric(metricsBefore, 'RecalcStyleCount'),
657552
forcedReflowCount: getMetric(metricsAfter, 'ForcedStyleRecalcs') - getMetric(metricsBefore, 'ForcedStyleRecalcs'),
658-
longTaskCount,
659553
longAnimationFrameCount: loafData.count,
660554
longAnimationFrameTotalMs: Math.round(loafData.totalMs * 100) / 100,
661555
frameCount: getMetric(metricsAfter, 'FrameCount') - getMetric(metricsBefore, 'FrameCount'),
662556
compositeLayers: getMetric(metricsAfter, 'CompositeLayers') - getMetric(metricsBefore, 'CompositeLayers'),
663557
paintCount: getMetric(metricsAfter, 'PaintCount') - getMetric(metricsBefore, 'PaintCount'),
664-
hasInternalMarks: chatMarks.length > 0,
665558
responseHasContent: responseInfo.hasContent,
666-
internalFirstToken,
667559
profilePath,
668560
tracePath,
669561
snapshotPath,
@@ -681,6 +573,68 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
681573
activeVSCode = null;
682574
await vscode.close();
683575
}
576+
577+
// Read the trace file written by VS Code on exit via --trace-startup-file
578+
/** @type {Array<any>} */
579+
let traceEvents = [];
580+
try {
581+
const traceData = JSON.parse(fs.readFileSync(tracePath, 'utf-8'));
582+
traceEvents = traceData.traceEvents || [];
583+
} catch {
584+
// Trace file may not exist if VS Code crashed before shutdown
585+
}
586+
587+
// Extract code/chat/* perf marks from blink.user_timing trace events.
588+
// These appear as instant ('R' or 'I') events with timestamps in microseconds.
589+
const chatMarks = traceEvents
590+
.filter(e => e.cat === 'blink.user_timing' && e.name && e.name.startsWith('code/chat/'))
591+
.map(e => ({ name: e.name, startTime: e.ts / 1000 }));
592+
593+
if (verbose && chatMarks.length > 0) {
594+
console.log(` [trace] chatMarks (${chatMarks.length}): ${chatMarks.map((/** @type {any} */ m) => m.name.split('/').slice(-1)[0]).join(', ')}`);
595+
}
596+
597+
// Parse timing — prefer internal code/chat/* marks (precise, in-process)
598+
// with client-side Date.now() as fallback for older builds without marks.
599+
const timeToUIUpdated = markDuration(chatMarks, 'request/start', 'request/uiUpdated');
600+
const internalFirstToken = markDuration(chatMarks, 'request/start', 'request/firstToken');
601+
const timeToFirstToken = internalFirstToken >= 0 ? internalFirstToken : (firstResponseTime - submitTime);
602+
const timeToComplete = responseCompleteTime - submitTime;
603+
const instructionCollectionTime = markDuration(chatMarks, 'request/willCollectInstructions', 'request/didCollectInstructions');
604+
const agentInvokeTime = markDuration(chatMarks, 'agent/willInvoke', 'agent/didInvoke');
605+
606+
// Parse GC events from trace.
607+
// Use the trace-event category and phase fields which are stable
608+
// across V8 versions, rather than matching event name substrings.
609+
let majorGCs = 0, minorGCs = 0, gcDurationMs = 0;
610+
for (const event of traceEvents) {
611+
const isGC = event.cat === 'v8.gc'
612+
|| event.cat === 'devtools.timeline,v8'
613+
|| (typeof event.cat === 'string' && event.cat.split(',').some((/** @type {string} */ c) => c.trim() === 'v8.gc'));
614+
if (!isGC) { continue; }
615+
// Only count complete ('X') or duration-begin ('B') events to
616+
// avoid double-counting begin/end pairs.
617+
if (event.ph && event.ph !== 'X' && event.ph !== 'B') { continue; }
618+
const name = event.name || '';
619+
if (/Major|MarkCompact|MSC|MC|IncrementalMarking|FinalizeMC/i.test(name)) { majorGCs++; }
620+
else if (/Minor|Scaveng/i.test(name)) { minorGCs++; }
621+
else { minorGCs++; } // default unknown GC events to minor
622+
if (event.dur) { gcDurationMs += event.dur / 1000; }
623+
}
624+
let longTaskCount = 0;
625+
for (const event of traceEvents) {
626+
if (event.name === 'RunTask' && event.dur && event.dur > 50_000) { longTaskCount++; }
627+
}
628+
629+
return {
630+
...partialMetrics,
631+
timeToUIUpdated, timeToFirstToken, timeToComplete, instructionCollectionTime, agentInvokeTime,
632+
hasInternalMarks: chatMarks.length > 0,
633+
internalFirstToken,
634+
majorGCs, minorGCs,
635+
gcDurationMs: Math.round(gcDurationMs * 100) / 100,
636+
longTaskCount,
637+
};
684638
}
685639

686640
// -- CI summary generation ---------------------------------------------------

0 commit comments

Comments
 (0)