Skip to content

Commit b63fb40

Browse files
authored
perf: hybrid hot-path evaluator — up to 40% faster dispatch (#785)
## Summary - Profile all 66 benchmark files across 5 suites to identify ExprTag visit frequencies. **Top 7 types cover 96.1%** of all `visitExpr` calls: ValidId (30%), BinaryOp (21%), Val.Literal (18%), Select (13%), Apply1 (5%), ObjExtend (4%), IfElse (4%). - Split `NewEvaluator.visitExpr` into a **hot path** (~120 bytecodes, 7 `instanceof` checks) and a **cold path** (`private visitExprCold` using `tag + @switch` for remaining 30 types). - The hot path fits within JIT `FreqInlineSize=325` bytecodes, enabling **C2 to inline `visitExpr` into callers** (`visitBinaryOp`, `visitSelect`, etc.). The old evaluator's ~700-bytecode method body never gets inlined. - Add `--new-evaluator` CLI flag for A/B testing. - Add `EvaluatorBenchmark` (JMH) and `ExprTagProfile` profiling tool. ## JMH Results Steady-state performance (1 fork, 8 warmup, 10 measurement iterations): | Benchmark | Old (ms) | New (ms) | Delta | |-----------|----------|----------|-------| | bench.01 | 0.026 | 0.018 | **-31%** | | bench.02 | 32.58 | 25.73 | **-21%** | | bench.03 | 9.39 | 5.64 | **-40%** | | gen_big_object | 0.928 | 0.715 | **-23%** | | string_render_perf | 0.768 | 0.496 | **-35%** | | base64_mega | 3.462 | 3.106 | **-10%** | | realistic1 | 1.850 | 1.764 | **-5%** | | heavy_string_render | 34.80 | 33.09 | **-5%** | | realistic2 | 47.32 | 47.78 | ~tied | | bench.04, 06, 08, 09 | - | - | ~tied | Evaluator-heavy benchmarks (bench.01–03, gen_big_object, string_render_perf) show **21–40% improvement**. Builtin-dominated benchmarks (bench.04, foldl, comparison) are unaffected — the evaluator dispatch is not their bottleneck. ## Why it works The old evaluator's `visitExpr` compiles to a ~700-bytecode `instanceof` chain. This exceeds JIT's `FreqInlineSize=325`, so **C2 never inlines it** into callers. Every recursive `visitExpr` call from within `visitBinaryOp`, `visitSelect`, etc. pays full virtual dispatch overhead. The hybrid approach splits into: - **Hot path** (~120 bytecodes): 7 `instanceof` checks for 96% of calls — small enough for C2 to inline - **Cold path** (separate method): `tag + @switch` tableswitch for the remaining 4% — O(1) dispatch instead of scanning 30+ `instanceof` checks ## ExprTag frequency data (global across all 66 benchmark files) ``` Rank ExprTag Count Pct Cumulative 1 ValidId 3,435,607 29.9% 29.9% 2 BinaryOp 2,455,182 21.4% 51.3% 3 Val.Literal 2,099,413 18.3% 69.5% 4 Select 1,464,561 12.7% 82.3% 5 Apply1 619,927 5.4% 87.7% 6 ObjExtend 485,621 4.2% 91.9% 7 IfElse 485,570 4.2% 96.1% 8 ObjBody.MemberList 250,734 2.2% 98.3% 9 ApplyBuiltin1 132,666 1.2% 99.4% 10+ (remaining) 63,212 0.6% 100.0% ``` ## Test plan - [x] `./mill 'sjsonnet.jvm[3.3.7]'.test` — all JVM tests pass (both old and new evaluator) - [x] `./mill __.reformat` — scalafmt clean - [x] JMH A/B benchmarks across cpp_suite, go_suite, bug_suite, sjsonnet_suite - [x] ExprTagProfile across all 66 benchmark files
1 parent c547cf8 commit b63fb40

5 files changed

Lines changed: 289 additions & 11 deletions

File tree

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
package sjsonnet.bench
2+
3+
import org.openjdk.jmh.annotations.*
4+
import org.openjdk.jmh.infra.*
5+
import sjsonnet.*
6+
7+
import java.io.{ByteArrayOutputStream, OutputStream, PrintStream, StringWriter}
8+
import java.util.concurrent.TimeUnit
9+
10+
/**
11+
* A/B benchmark comparing old (instanceof chain) vs new (tag + tableswitch) evaluator.
12+
*
13+
* Runs the full interpret pipeline (parse → optimize → evaluate → materialize) for each benchmark
14+
* file, isolating the evaluator difference by using the same Settings with only `useNewEvaluator`
15+
* toggled.
16+
*/
17+
@BenchmarkMode(Array(Mode.AverageTime))
18+
@Fork(2)
19+
@Threads(1)
20+
@Warmup(iterations = 15)
21+
@Measurement(iterations = 20)
22+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
23+
@State(Scope.Benchmark)
24+
class EvaluatorBenchmark {
25+
26+
@Param(
27+
Array(
28+
// cpp_suite — C++ jsonnet benchmarks
29+
"bench/resources/cpp_suite/bench.01.jsonnet",
30+
"bench/resources/cpp_suite/bench.02.jsonnet",
31+
"bench/resources/cpp_suite/bench.03.jsonnet",
32+
"bench/resources/cpp_suite/bench.04.jsonnet",
33+
"bench/resources/cpp_suite/bench.06.jsonnet",
34+
"bench/resources/cpp_suite/bench.08.jsonnet",
35+
"bench/resources/cpp_suite/bench.09.jsonnet",
36+
"bench/resources/cpp_suite/gen_big_object.jsonnet",
37+
"bench/resources/cpp_suite/heavy_string_render.jsonnet",
38+
"bench/resources/cpp_suite/large_string_join.jsonnet",
39+
"bench/resources/cpp_suite/realistic1.jsonnet",
40+
"bench/resources/cpp_suite/realistic2.jsonnet",
41+
"bench/resources/cpp_suite/string_render_perf.jsonnet",
42+
// go_suite — Go jsonnet builtins
43+
"bench/resources/go_suite/base64_heavy.jsonnet",
44+
"bench/resources/go_suite/base64_mega.jsonnet",
45+
"bench/resources/go_suite/comparison.jsonnet",
46+
"bench/resources/go_suite/comparison2.jsonnet",
47+
"bench/resources/go_suite/foldl.jsonnet",
48+
"bench/resources/go_suite/reverse.jsonnet",
49+
"bench/resources/go_suite/substr.jsonnet",
50+
// bug_suite
51+
"bench/resources/bug_suite/assertions.jsonnet",
52+
// sjsonnet_suite
53+
"bench/resources/sjsonnet_suite/setDiff.jsonnet"
54+
)
55+
)
56+
var path: String = _
57+
58+
private var wd: os.Path = _
59+
private var filePath: OsPath = _
60+
private var fileContent: String = _
61+
private var jpaths: Seq[OsPath] = _
62+
63+
@Setup(Level.Trial)
64+
def setup(): Unit = {
65+
wd = sys.env.get("MILL_WORKSPACE_ROOT").map(os.Path(_)).getOrElse(os.pwd)
66+
filePath = OsPath(wd / os.RelPath(path))
67+
fileContent = os.read(wd / os.RelPath(path))
68+
jpaths = Seq(OsPath(wd))
69+
}
70+
71+
private def run(useNew: Boolean): String = {
72+
val settings = new Settings(
73+
useNewEvaluator = useNew,
74+
maxStack = 100000
75+
)
76+
val interp = new Interpreter(
77+
Map.empty[String, String],
78+
Map.empty[String, String],
79+
OsPath(wd),
80+
importer = new SjsonnetMainBase.SimpleImporter(jpaths, None),
81+
parseCache = new DefaultParseCache,
82+
settings = settings
83+
)
84+
val writer = new StringWriter
85+
val renderer = new Renderer(writer, indent = 3)
86+
interp.interpret0(fileContent, filePath, renderer) match {
87+
case Right(_) => writer.toString
88+
case Left(e) => throw new RuntimeException(e)
89+
}
90+
}
91+
92+
@Benchmark
93+
def oldEvaluator(bh: Blackhole): Unit = {
94+
bh.consume(run(useNew = false))
95+
}
96+
97+
@Benchmark
98+
def newEvaluator(bh: Blackhole): Unit = {
99+
bh.consume(run(useNew = true))
100+
}
101+
}
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
package sjsonnet.bench
2+
3+
import sjsonnet.*
4+
5+
import java.io.StringWriter
6+
7+
/**
8+
* Profile the frequency of each ExprTag in visitExpr calls across benchmark workloads. Run with:
9+
* ./mill bench.runMain sjsonnet.bench.ExprTagProfile [file1.jsonnet file2.jsonnet ...]
10+
*
11+
* If no files are given, profiles ALL .jsonnet files under bench/resources/.
12+
*/
13+
object ExprTagProfile {
14+
15+
private val tagNames = Array(
16+
"UNTAGGED", // 0
17+
"ValidId", // 1
18+
"BinaryOp", // 2
19+
"Select", // 3
20+
"Val.Literal", // 4
21+
"Val.Func", // 5
22+
"ApplyBuiltin0", // 6
23+
"ApplyBuiltin1", // 7
24+
"ApplyBuiltin2", // 8
25+
"ApplyBuiltin3", // 9
26+
"ApplyBuiltin4", // 10
27+
"And", // 11
28+
"Or", // 12
29+
"UnaryOp", // 13
30+
"Apply1", // 14
31+
"Lookup", // 15
32+
"Function", // 16
33+
"LocalExpr", // 17
34+
"Apply", // 18
35+
"IfElse", // 19
36+
"Apply3", // 20
37+
"ObjBody.MemberList", // 21
38+
"Apply2", // 22
39+
"AssertExpr", // 23
40+
"ApplyBuiltin", // 24
41+
"Comp", // 25
42+
"Arr", // 26
43+
"SelectSuper", // 27
44+
"LookupSuper", // 28
45+
"InSuper", // 29
46+
"ObjExtend", // 30
47+
"ObjBody.ObjComp", // 31
48+
"Slice", // 32
49+
"Import", // 33
50+
"Apply0", // 34
51+
"ImportStr", // 35
52+
"ImportBin", // 36
53+
"Error" // 37
54+
)
55+
56+
def main(args: Array[String]): Unit = {
57+
val wd = sys.env.get("MILL_WORKSPACE_ROOT").map(os.Path(_)).getOrElse(os.pwd)
58+
val benchRoot = wd / "bench" / "resources"
59+
60+
val files =
61+
if (args.nonEmpty) args.map(os.RelPath(_)).toSeq
62+
else
63+
os.walk(benchRoot)
64+
.filter(_.ext == "jsonnet")
65+
.map(_.relativeTo(wd))
66+
.sorted
67+
68+
val globalCounts = new Array[Long](40)
69+
var globalTotal = 0L
70+
val perFile = scala.collection.mutable.ArrayBuffer[(String, Long, Array[Long])]()
71+
72+
for (rel <- files) {
73+
val counts = new Array[Long](40)
74+
val filePath = OsPath(wd / rel)
75+
val content =
76+
try os.read(wd / rel)
77+
catch { case _: Exception => System.err.println(s"SKIP (read error): $rel"); "" }
78+
if (content.nonEmpty) {
79+
val ok =
80+
try {
81+
val interp = new Interpreter(
82+
Map.empty[String, String],
83+
Map.empty[String, String],
84+
OsPath(wd),
85+
importer = new SjsonnetMainBase.SimpleImporter(
86+
Seq(OsPath(wd), OsPath(wd / "bench"), OsPath(wd / "bench" / "resources")),
87+
None
88+
),
89+
parseCache = new DefaultParseCache,
90+
settings = new Settings(maxStack = 100000)
91+
) {
92+
override def createEvaluator(
93+
resolver: CachedResolver,
94+
extVars: String => Option[Expr],
95+
wd: Path,
96+
settings: Settings): Evaluator =
97+
new Evaluator(resolver, extVars, wd, settings) {
98+
override def visitExpr(e: Expr)(implicit scope: ValScope): Val = {
99+
val t = e.tag & 0xff
100+
if (t < counts.length) counts(t) += 1
101+
super.visitExpr(e)
102+
}
103+
}
104+
}
105+
val writer = new StringWriter
106+
val renderer = new Renderer(writer, indent = 3)
107+
interp.interpret0(content, filePath, renderer) match {
108+
case Right(_) => true
109+
case Left(e) =>
110+
System.err.println(s"ERROR: $rel: $e")
111+
false
112+
}
113+
} catch {
114+
case e: StackOverflowError =>
115+
System.err.println(s"SKIP (StackOverflow): $rel")
116+
false
117+
case e: Exception =>
118+
System.err.println(s"SKIP (${e.getClass.getSimpleName}): $rel")
119+
false
120+
}
121+
122+
val total = counts.sum
123+
if (ok && total > 0) {
124+
perFile += ((rel.toString, total, counts.clone()))
125+
var i = 0
126+
while (i < counts.length) {
127+
globalCounts(i) += counts(i)
128+
i += 1
129+
}
130+
globalTotal += total
131+
}
132+
}
133+
}
134+
135+
// Per-file summary
136+
println("\n" + "=" * 100)
137+
println("PER-FILE SUMMARY")
138+
println("=" * 100)
139+
for ((file, total, counts) <- perFile.sortBy(-_._2)) {
140+
val sorted = counts.zipWithIndex.filter(_._1 > 0).sortBy(-_._1)
141+
val top3 = sorted
142+
.take(3)
143+
.map { case (c, idx) =>
144+
val name = if (idx < tagNames.length) tagNames(idx) else s"tag=$idx"
145+
f"$name(${c * 100.0 / total}%.0f%%)"
146+
}
147+
.mkString(", ")
148+
println(f" $file%-65s total=$total%10d top3: $top3")
149+
}
150+
151+
// Global aggregation
152+
println("\n" + "=" * 100)
153+
println(f"GLOBAL AGGREGATE (${perFile.size} files, $globalTotal%,d total visitExpr calls)")
154+
println("=" * 100)
155+
val globalSorted = globalCounts.zipWithIndex.filter(_._1 > 0).sortBy(-_._1)
156+
var cumPct = 0.0
157+
println(f" ${"Rank"}%-5s ${"ExprTag"}%-20s ${"Count"}%12s ${"Pct"}%7s ${"Cumulative"}%10s")
158+
println(" " + "-" * 60)
159+
for (((count, idx), rank) <- globalSorted.zipWithIndex) {
160+
val name = if (idx < tagNames.length) tagNames(idx) else s"tag=$idx"
161+
val pct = count * 100.0 / globalTotal
162+
cumPct += pct
163+
println(f" ${rank + 1}%-5d $name%-20s $count%,12d $pct%6.1f%% $cumPct%9.1f%%")
164+
}
165+
}
166+
}

sjsonnet/src-jvm-native/sjsonnet/Config.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,11 @@ final case class Config(
173173
"Profile evaluation and write results to a file. Format: --profile <file> or --profile <format>:<file> where format is 'text' (default) or 'flamegraph'"
174174
)
175175
profile: Option[String] = None,
176+
@arg(
177+
name = "new-evaluator",
178+
doc = "Use the new tag-based evaluator (hybrid instanceof + tableswitch dispatch)"
179+
)
180+
newEvaluator: Flag = Flag(),
176181
@arg(
177182
doc = "The jsonnet file you wish to evaluate",
178183
positional = true

sjsonnet/src-jvm-native/sjsonnet/SjsonnetMainBase.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ object SjsonnetMainBase {
189189
throwErrorForInvalidSets = config.throwErrorForInvalidSets.value,
190190
maxParserRecursionDepth = config.maxParserRecursionDepth,
191191
brokenAssertionLogic = config.brokenAssertionLogic.value,
192+
useNewEvaluator = config.newEvaluator.value,
192193
maxStack = config.maxStack
193194
),
194195
parseCache,

sjsonnet/src/sjsonnet/Evaluator.scala

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2066,13 +2066,24 @@ class NewEvaluator(
20662066
fc: FormatCache = FormatCache.SharedDefault)
20672067
extends Evaluator(r, e, w, s, wa, ds, fc) {
20682068

2069+
// Hot path: top 7 types cover 96.1% of all visitExpr calls across benchmarks.
2070+
// ~120 bytes bytecode — within JIT FreqInlineSize=325, unlike the old evaluator's ~700 bytes.
2071+
// Order matches old evaluator's first 4 types (ValidId, BinaryOp, Select, Val) for C1 parity.
20692072
override def visitExpr(e: Expr)(implicit scope: ValScope): Val = try {
2073+
if (e.isInstanceOf[ValidId]) visitValidId(e.asInstanceOf[ValidId])
2074+
else if (e.isInstanceOf[BinaryOp]) visitBinaryOp(e.asInstanceOf[BinaryOp])
2075+
else if (e.isInstanceOf[Select]) visitSelect(e.asInstanceOf[Select])
2076+
else if (e.isInstanceOf[Val]) e.asInstanceOf[Val]
2077+
else if (e.isInstanceOf[Apply1]) visitApply1(e.asInstanceOf[Apply1])
2078+
else if (e.isInstanceOf[ObjExtend]) visitObjExtend(e.asInstanceOf[ObjExtend])
2079+
else if (e.isInstanceOf[IfElse]) visitIfElse(e.asInstanceOf[IfElse])
2080+
else visitExprCold(e)
2081+
} catch {
2082+
Error.withStackFrame(e)
2083+
}
2084+
2085+
private def visitExprCold(e: Expr)(implicit scope: ValScope): Val =
20702086
(e.tag: @switch) match {
2071-
case ExprTags.ValidId => visitValidId(e.asInstanceOf[ValidId])
2072-
case ExprTags.BinaryOp => visitBinaryOp(e.asInstanceOf[BinaryOp])
2073-
case ExprTags.Select => visitSelect(e.asInstanceOf[Select])
2074-
case ExprTags.`Val.Func` => e.asInstanceOf[Val.Func]
2075-
case ExprTags.`Val.Literal` => e.asInstanceOf[Val.Literal]
20762087
case ExprTags.ApplyBuiltin0 => visitApplyBuiltin0(e.asInstanceOf[ApplyBuiltin0])
20772088
case ExprTags.ApplyBuiltin1 => visitApplyBuiltin1(e.asInstanceOf[ApplyBuiltin1])
20782089
case ExprTags.ApplyBuiltin2 => visitApplyBuiltin2(e.asInstanceOf[ApplyBuiltin2])
@@ -2081,14 +2092,12 @@ class NewEvaluator(
20812092
case ExprTags.And => visitAnd(e.asInstanceOf[And])
20822093
case ExprTags.Or => visitOr(e.asInstanceOf[Or])
20832094
case ExprTags.UnaryOp => visitUnaryOp(e.asInstanceOf[UnaryOp])
2084-
case ExprTags.Apply1 => visitApply1(e.asInstanceOf[Apply1])
20852095
case ExprTags.Lookup => visitLookup(e.asInstanceOf[Lookup])
20862096
case ExprTags.Function =>
20872097
val f = e.asInstanceOf[Function]
20882098
visitMethod(f.body, f.params, f.pos)
20892099
case ExprTags.LocalExpr => visitLocalExpr(e.asInstanceOf[LocalExpr])
20902100
case ExprTags.Apply => visitApply(e.asInstanceOf[Apply])
2091-
case ExprTags.IfElse => visitIfElse(e.asInstanceOf[IfElse])
20922101
case ExprTags.Apply3 => visitApply3(e.asInstanceOf[Apply3])
20932102
case ExprTags.`ObjBody.MemberList` =>
20942103
val oml = e.asInstanceOf[ObjBody.MemberList]
@@ -2101,7 +2110,6 @@ class NewEvaluator(
21012110
case ExprTags.SelectSuper => visitSelectSuper(e.asInstanceOf[SelectSuper])
21022111
case ExprTags.LookupSuper => visitLookupSuper(e.asInstanceOf[LookupSuper])
21032112
case ExprTags.InSuper => visitInSuper(e.asInstanceOf[InSuper])
2104-
case ExprTags.ObjExtend => visitObjExtend(e.asInstanceOf[ObjExtend])
21052113
case ExprTags.`ObjBody.ObjComp` => visitObjComp(e.asInstanceOf[ObjBody.ObjComp], null)
21062114
case ExprTags.Slice => visitSlice(e.asInstanceOf[Slice])
21072115
case ExprTags.Import => visitImport(e.asInstanceOf[Import])
@@ -2111,9 +2119,6 @@ class NewEvaluator(
21112119
case ExprTags.Error => visitError(e.asInstanceOf[Expr.Error])
21122120
case _ => visitInvalid(e)
21132121
}
2114-
} catch {
2115-
Error.withStackFrame(e)
2116-
}
21172122
// This is only needed for --no-static-errors, otherwise these expression types do not make it past the optimizer
21182123
override def visitInvalid(e: Expr): Nothing = (e.tag: @switch) match {
21192124
case ExprTags.Id =>

0 commit comments

Comments
 (0)