From 5deffad68ce61c37f633f0f27c6edef52fa554f8 Mon Sep 17 00:00:00 2001 From: Christian Murphy Date: Sun, 3 May 2026 08:13:12 -0700 Subject: [PATCH] perf(compile): build dispatch handler context as a stable-shape literal The compile loop built each handler's this-binding with Object.assign({sliceSerialize: ...}, context). Object.assign goes through a generic copy-loop over the source's enumerable properties; V8 cannot give that loop a stable hidden class for the merged object because the source's shape is a runtime variable. Each event paid the cost of a copy-iteration plus a hidden-class transition. The fix replaces the Object.assign with an inline object literal that lists every field explicitly. The literal has a fixed compile-time shape so V8 keeps a single hidden class for the merged object across every event, allocates it through a fast path, and can inline the construction at the call site. The merged object is still allocated per event, so the prior contract (handlers may capture `this` or reassign top-level fields without leaking into subsequent events) is preserved. The dispatch is otherwise unchanged. Inputs that benefit, with multi-run median-of-medians vs the baseline (spread in parentheses): 10,000 short backtick code spans -36.1% (5.3%) 1,000 inline links in a paragraph -31.0% (2.0%) 10,000 character entity references -29.9% (2.2%) one CommonMark example -13.8% (5.5%) CommonMark spec * 35 (~564 KB) -10.6% (3.6%) CommonMark spec * 7 (~113 KB) -7.8% (18.3%) full CommonMark spec (~16 KB) -7.4% (43.5%, NOISY) Every input that drives a non-trivial number of events through the dispatch loop benefits, because Object.assign was the cost being eliminated. Inputs with high node-count-per-byte (many small inline tokens) gain the most. Trade-offs and inputs where the gain is small or absent: A 10,000-unmatched-asterisks input and a 256 KB Unicode-heavy input both moved within +/- 1% of baseline. Their event count per byte is low, so the per-event Object.assign was not a hotspot. The pure emphasis stress inputs ('a**b' repeated 10,000 times and similar) reported large deltas in single runs but their cross-run spread is 44 to 53% on the baseline alone. The input shape (mostly attentionSequence events that mostly do not match a handler) means the per-event merge was not the cost driver. The deltas there are noise. Tests pass: dev + prod 1448/1448, 100% coverage maintained, mdast-util-gfm 54/54, mdast-util-mdx 11/13. The two failing mdx tests reproduce on upstream/main and are not introduced by this branch. --- dev/lib/index.js | 34 +++++++++++++++++++++++++--------- dev/lib/types.d.ts | 9 +++++++++ 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/dev/lib/index.js b/dev/lib/index.js index 51b7144..fa07098 100644 --- a/dev/lib/index.js +++ b/dev/lib/index.js @@ -247,16 +247,32 @@ function compiler(options) { index = -1 + // The handler this-binding receives the same fields as `context` plus a + // per-event `sliceSerialize`. Build the merged object as a stable-shape + // literal so V8 keeps a single hidden class for it and can inline the + // construction; the previous Object.assign({sliceSerialize: ...}, context) + // form created the same object but went through a copy-loop and gave V8 + // less to optimize. A fresh object per event preserves the previous + // contract that handlers may capture `this` or reassign top-level fields + // without leaking into subsequent events. while (++index < events.length) { - const handler = config[events[index][0]] - - if (own.call(handler, events[index][1].type)) { - handler[events[index][1].type].call( - Object.assign( - {sliceSerialize: events[index][2].sliceSerialize}, - context - ), - events[index][1] + const event = events[index] + const handler = config[event[0]] + + if (own.call(handler, event[1].type)) { + handler[event[1].type].call( + { + sliceSerialize: event[2].sliceSerialize, + stack: context.stack, + tokenStack: context.tokenStack, + config: context.config, + enter: context.enter, + exit: context.exit, + buffer: context.buffer, + resume: context.resume, + data: context.data + }, + event[1] ) } } diff --git a/dev/lib/types.d.ts b/dev/lib/types.d.ts index 710de76..43ce11f 100644 --- a/dev/lib/types.d.ts +++ b/dev/lib/types.d.ts @@ -3,6 +3,15 @@ import type {ParseOptions, Token} from 'micromark-util-types' /** * Compiler context. + * + * The object passed to handlers as `this` is scoped to a single handler + * call. A fresh object is constructed for every event in the dispatch loop, + * so handlers must not retain a `this` reference between events nor reassign + * its top-level fields and expect the change to leak into subsequent events. + * Cross-event state belongs in `this.data`, which is the shared key/value + * store described below; mutations through `this.stack`, `this.tokenStack`, + * and the `enter`/`exit`/`buffer`/`resume` helpers go to the same shared + * underlying objects and remain visible across events as before. */ export interface CompileContext { /**