update

pwang347 · pwang347 · commit e979b6e476ed · 2026-04-15T14:22:07.000-07:00
diff --git a/.github/workflows/chat-perf.yml b/.github/workflows/chat-perf.yml
@@ -324,7 +324,7 @@ jobs:
       - name: Run memory leak check
         id: leak
         run: |
-          LEAK_ARGS="--verbose"
+          LEAK_ARGS="--verbose --ci"
           if [[ -n "$TEST_COMMIT" ]]; then
             LEAK_ARGS="$LEAK_ARGS --build $TEST_COMMIT"
           fi
@@ -345,6 +345,7 @@ jobs:
           path: |
             leak-output.log
             .chat-simulation-data/chat-simulation-leak-results.json
+            .chat-simulation-data/ci-summary-leak.md
           retention-days: 30
 
   # ── Report: collect results, write summary, fail on regression ──────
@@ -390,40 +391,9 @@ jobs:
             echo "⚠️ No summary files generated. Check perf-output.log artifacts." >> "$GITHUB_STEP_SUMMARY"
           fi
 
-          if [[ -f leak-results/.chat-simulation-data/chat-simulation-leak-results.json ]]; then
-            echo "" >> "$GITHUB_STEP_SUMMARY"
-            echo "## Memory Leak Check" >> "$GITHUB_STEP_SUMMARY"
+          if [[ -f leak-results/.chat-simulation-data/ci-summary-leak.md ]]; then
             echo "" >> "$GITHUB_STEP_SUMMARY"
-
-            node -e "
-              const r = JSON.parse(require('fs').readFileSync('leak-results/.chat-simulation-data/chat-simulation-leak-results.json', 'utf-8'));
-              const threshold = r.leakThresholdMB || 10;
-              const leaked = r.totalResidualMB > threshold;
-              const verdict = leaked ? '❌ **LEAK DETECTED**' : '✅ **No leak detected**';
-              const lines = [];
-              lines.push('| | |');
-              lines.push('|---|---|');
-              lines.push('| **Verdict** | ' + verdict + ' |');
-              lines.push('| **Threshold** | ' + threshold + ' MB |');
-              lines.push('| **Iterations** | ' + (r.iterationCount || r.iterations.length) + ' (+ 1 warmup) |');
-              lines.push('| **Scenarios per iteration** | ' + (r.scenarioCount || '—') + ' |');
-              lines.push('');
-              lines.push('| Phase | Heap (MB) | DOM Nodes |');
-              lines.push('|-------|----------:|----------:|');
-              lines.push('| Baseline (post-warmup) | ' + r.baseline.heapMB + ' | ' + r.baseline.domNodes + ' |');
-              for (let i = 0; i < r.iterations.length; i++) {
-                const it = r.iterations[i];
-                const sign = it.deltaHeapMB > 0 ? '+' : '';
-                const domSign = it.deltaDomNodes > 0 ? '+' : '';
-                lines.push('| Iteration ' + (i + 1) + ' | ' + it.afterHeapMB + ' (' + sign + it.deltaHeapMB + ') | ' + it.afterDomNodes + ' (' + domSign + it.deltaDomNodes + ') |');
-              }
-              lines.push('| **Final** | **' + r.final.heapMB + '** | **' + r.final.domNodes + '** |');
-              lines.push('');
-              const sign = r.totalResidualMB > 0 ? '+' : '';
-              const domSign = r.totalResidualNodes > 0 ? '+' : '';
-              lines.push('**Total residual growth:** ' + sign + r.totalResidualMB + ' MB heap, ' + domSign + r.totalResidualNodes + ' DOM nodes');
-              console.log(lines.join('\n'));
-            " >> "$GITHUB_STEP_SUMMARY"
+            cat leak-results/.chat-simulation-data/ci-summary-leak.md >> "$GITHUB_STEP_SUMMARY"
           fi
 
       - name: Zip diagnostic outputs
diff --git a/scripts/chat-simulation/test-chat-mem-leaks.js b/scripts/chat-simulation/test-chat-mem-leaks.js
@@ -55,6 +55,7 @@ function parseArgs() {
 		iterations: CONFIG.iterations ?? 3,
 		messages: CONFIG.messages ?? 5,
 		verbose: false,
+		ci: false,
 		/** @type {string | undefined} */
 		build: undefined,
 		leakThresholdMB: CONFIG.leakThresholdMB ?? 5,
@@ -64,6 +65,7 @@ function parseArgs() {
 			case '--iterations': opts.iterations = parseInt(args[++i], 10); break;
 			case '--messages': case '-n': opts.messages = parseInt(args[++i], 10); break;
 			case '--verbose': opts.verbose = true; break;
+			case '--ci': opts.ci = true; break;
 			case '--build': case '-b': opts.build = args[++i]; break;
 			case '--threshold': opts.leakThresholdMB = parseFloat(args[++i]); break;
 			case '--help': case '-h':
@@ -73,6 +75,7 @@ function parseArgs() {
 					'Options:',
 					'  --iterations <n>    Number of open→work→reset cycles (default: 3)',
 					'  --messages <n>      Messages to send per iteration (default: 5)',
+					'  --ci                CI mode: write Markdown summary to ci-summary.md',
 					'  --build <path|ver>  Path to VS Code build or version to download',
 					'  --threshold <MB>    Max total residual heap growth in MB (default: 5)',
 					'  --verbose           Print per-step details',
@@ -413,8 +416,51 @@ async function main() {
 		console.log(`[chat-simulation] No leak detected (${result.totalResidualMB}MB residual < ${opts.leakThresholdMB}MB threshold)`);
 	}
 
+	if (opts.ci) {
+		const summary = generateLeakCISummary(result, opts);
+		const summaryPath = path.join(DATA_DIR, 'ci-summary-leak.md');
+		fs.writeFileSync(summaryPath, summary);
+		console.log(`[chat-simulation] CI summary written to ${summaryPath}`);
+	}
+
 	await mockServer.close();
 	process.exit(leaked ? 1 : 0);
 }
 
+/**
+ * Generate a Markdown summary for CI, matching the perf script pattern.
+ * @param {{ baseline: { heapMB: number, domNodes: number }, final: { heapMB: number, domNodes: number }, totalResidualMB: number, totalResidualNodes: number, iterations: { beforeHeapMB: number, afterHeapMB: number, deltaHeapMB: number, beforeDomNodes: number, afterDomNodes: number, deltaDomNodes: number }[] }} result
+ * @param {{ leakThresholdMB: number, iterations: number }} opts
+ */
+function generateLeakCISummary(result, opts) {
+	const leaked = result.totalResidualMB > opts.leakThresholdMB;
+	const verdict = leaked ? '\u274C **LEAK DETECTED**' : '\u2705 **No leak detected**';
+	const lines = [];
+	lines.push('## Memory Leak Check');
+	lines.push('');
+	lines.push('| | |');
+	lines.push('|---|---|');
+	lines.push(`| **Verdict** | ${verdict} |`);
+	lines.push(`| **Threshold** | ${opts.leakThresholdMB} MB |`);
+	lines.push(`| **Iterations** | ${opts.iterations} (+ 1 warmup) |`);
+	lines.push(`| **Scenarios per iteration** | ${getScenarioIds().length} |`);
+	lines.push('');
+	lines.push('| Phase | Heap (MB) | DOM Nodes |');
+	lines.push('|-------|----------:|----------:|');
+	lines.push(`| Baseline (post-warmup) | ${result.baseline.heapMB} | ${result.baseline.domNodes} |`);
+	for (let i = 0; i < result.iterations.length; i++) {
+		const it = result.iterations[i];
+		const sign = it.deltaHeapMB > 0 ? '+' : '';
+		const domSign = it.deltaDomNodes > 0 ? '+' : '';
+		lines.push(`| Iteration ${i + 1} | ${it.afterHeapMB} (${sign}${it.deltaHeapMB}) | ${it.afterDomNodes} (${domSign}${it.deltaDomNodes}) |`);
+	}
+	lines.push(`| **Final** | **${result.final.heapMB}** | **${result.final.domNodes}** |`);
+	lines.push('');
+	const sign = result.totalResidualMB > 0 ? '+' : '';
+	const domSign = result.totalResidualNodes > 0 ? '+' : '';
+	lines.push(`**Total residual growth:** ${sign}${result.totalResidualMB} MB heap, ${domSign}${result.totalResidualNodes} DOM nodes`);
+	lines.push('');
+	return lines.join('\n');
+}
+
 main().catch(err => { console.error(err); process.exit(1); });
diff --git a/scripts/chat-simulation/test-chat-perf-regression.js b/scripts/chat-simulation/test-chat-perf-regression.js
@@ -255,7 +255,7 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
 	let extHostInspector = null;
 	/** @type {{ usedSize: number, totalSize: number } | null} */
 	let extHostHeapBefore = null;
-	/** @type {Omit<RunMetrics, 'majorGCs' | 'minorGCs' | 'gcDurationMs' | 'longTaskCount' | 'timeToUIUpdated' | 'timeToFirstToken' | 'timeToComplete' | 'instructionCollectionTime' | 'agentInvokeTime' | 'hasInternalMarks' | 'internalFirstToken'> | null} */
+	/** @type {Omit<RunMetrics, 'majorGCs' | 'minorGCs' | 'gcDurationMs' | 'longTaskCount' | 'longAnimationFrameCount' | 'longAnimationFrameTotalMs' | 'timeToUIUpdated' | 'timeToFirstToken' | 'timeToComplete' | 'instructionCollectionTime' | 'agentInvokeTime' | 'hasInternalMarks' | 'internalFirstToken'> | null} */
 	let partialMetrics = null;
 	// Timing vars hoisted for access in post-close trace parsing
 	let submitTime = 0;
@@ -365,26 +365,6 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
 		await cdp.send('Profiler.enable');
 		await cdp.send('Profiler.start');
 
-		// Install a PerformanceObserver for Long Animation Frames (LoAF)
-		// to capture frame-level jank that longTaskCount alone misses.
-		await window.evaluate(() => {
-			// @ts-ignore
-			globalThis._chatLoAFEntries = [];
-			try {
-				// @ts-ignore
-				globalThis._chatLoAFObserver = new PerformanceObserver((list) => {
-					for (const entry of list.getEntries()) {
-						// @ts-ignore
-						globalThis._chatLoAFEntries.push({ duration: entry.duration, startTime: entry.startTime });
-					}
-				});
-				// @ts-ignore
-				globalThis._chatLoAFObserver.observe({ type: 'long-animation-frame', buffered: false });
-			} catch {
-				// long-animation-frame not supported in this build — metrics will be 0
-			}
-		});
-
 		// Submit
 		const completionsBefore = mockServer.completionCount();
 		submitTime = Date.now();
@@ -505,21 +485,6 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
 			console.log(`  [debug] Client-side timing: firstResponse=${firstResponseTime - submitTime}ms, complete=${responseCompleteTime - submitTime}ms`);
 		}
 
-		// Collect Long Animation Frame entries and tear down the observer
-		const loafData = await window.evaluate(() => {
-			// @ts-ignore
-			if (globalThis._chatLoAFObserver) { globalThis._chatLoAFObserver.disconnect(); }
-			// @ts-ignore
-			const entries = globalThis._chatLoAFEntries ?? [];
-			// @ts-ignore
-			delete globalThis._chatLoAFEntries;
-			// @ts-ignore
-			delete globalThis._chatLoAFObserver;
-			const count = entries.length;
-			const totalMs = entries.reduce((/** @type {number} */ sum, /** @type {any} */ e) => sum + e.duration, 0);
-			return { count, totalMs };
-		});
-
 		const heapAfter = /** @type {any} */ (await cdp.send('Runtime.getHeapUsage'));
 		const metricsAfter = await cdp.send('Performance.getMetrics');
 
@@ -617,8 +582,6 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
 			layoutCount: getMetric(metricsAfter, 'LayoutCount') - getMetric(metricsBefore, 'LayoutCount'),
 			recalcStyleCount: getMetric(metricsAfter, 'RecalcStyleCount') - getMetric(metricsBefore, 'RecalcStyleCount'),
 			forcedReflowCount: getMetric(metricsAfter, 'ForcedStyleRecalcs') - getMetric(metricsBefore, 'ForcedStyleRecalcs'),
-			longAnimationFrameCount: loafData.count,
-			longAnimationFrameTotalMs: Math.round(loafData.totalMs * 100) / 100,
 			frameCount: getMetric(metricsAfter, 'FrameCount') - getMetric(metricsBefore, 'FrameCount'),
 			compositeLayers: getMetric(metricsAfter, 'CompositeLayers') - getMetric(metricsBefore, 'CompositeLayers'),
 			paintCount: getMetric(metricsAfter, 'PaintCount') - getMetric(metricsBefore, 'PaintCount'),
@@ -693,6 +656,30 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
 		if (event.name === 'RunTask' && event.dur && event.dur > 50_000) { longTaskCount++; }
 	}
 
+	// Parse Long Animation Frame (LoAF) events from devtools.timeline trace.
+	// AnimationFrame events use async flow pairs (ph:'s' start, ph:'f' finish)
+	// with matching ids. Compute duration from each s→f pair.
+	let longAnimationFrameCount = 0;
+	let longAnimationFrameTotalMs = 0;
+	{
+		/** @type {Map<number, number>} */
+		const frameStarts = new Map();
+		for (const event of traceEvents) {
+			if (event.cat === 'devtools.timeline' && event.name === 'AnimationFrame') {
+				if (event.ph === 's') {
+					frameStarts.set(event.id, event.ts);
+				} else if (event.ph === 'f' && frameStarts.has(event.id)) {
+					const durationMs = (event.ts - frameStarts.get(event.id)) / 1000;
+					frameStarts.delete(event.id);
+					if (durationMs > 50) {
+						longAnimationFrameCount++;
+						longAnimationFrameTotalMs += durationMs;
+					}
+				}
+			}
+		}
+	}
+
 	return {
 		...partialMetrics,
 		timeToUIUpdated, timeToFirstToken, timeToComplete, instructionCollectionTime, agentInvokeTime,
@@ -701,6 +688,8 @@ async function runOnce(electronPath, scenario, mockServer, verbose, runIndex, ru
 		majorGCs, minorGCs,
 		gcDurationMs: Math.round(gcDurationMs * 100) / 100,
 		longTaskCount,
+		longAnimationFrameCount,
+		longAnimationFrameTotalMs: Math.round(longAnimationFrameTotalMs * 100) / 100,
 	};
 }
 
@@ -970,6 +959,7 @@ function generateCISummary(jsonReport, baseline, opts) {
 			else if (v.verdict === 'improved') { verdictDisplay = '\u2B06\uFE0F improved'; }
 			else if (v.verdict === 'ok') { verdictDisplay = '\u2705 ok'; }
 			else if (v.verdict === 'noise') { verdictDisplay = '\uD83C\uDF2B\uFE0F noise'; }
+			else if (v.verdict === 'info') { verdictDisplay = '\u2139\uFE0F'; }
 			lines.push(`| ${v.metric} | ${v.basStr} | ${v.curStr} | ${pct} | ${v.pValue} | ${verdictDisplay} |`);
 		}
 		lines.push('');