Skip to content

Commit dece904

Browse files
committed
perf: SIMD base64 via aklomp/base64 + ByteArr/RangeArr/asciiSafe
Motivation: PR #749 added SIMD base64 and runtime optimizations (ByteArr, RangeArr, asciiSafe) but was reverted by #777 due to incorrect hand-written x86 SIMD C code. This PR restores all optimizations while replacing the buggy SIMD code with the battle-tested aklomp/base64 library. Modification: - Replace hand-written C SIMD with aklomp/base64 (BSD-2-Clause) which provides correct SIMD dispatch (SSSE3/AVX2/AVX512/NEON64) via runtime CPU detection - Add PlatformBase64 abstraction: JVM/JS use java.util.Base64 with strict RFC 4648 padding validation, Native uses aklomp/base64 FFI - Switch to strict mode aligned with go-jsonnet: reject unpadded base64 input (e.g. "YQ" without "=="). java.util.Base64 is lenient, so JVM/JS add explicit length check for ASCII input, matching go-jsonnet's len(str) % 4 != 0 check (builtins.go:1467) - Restore Val.ByteArr: compact byte-backed array for base64DecodeBytes - Restore Val.RangeArr subclass from flag-based _isRange - Restore Val.Str._asciiSafe + renderAsciiSafeString - Restore Materializer/ByteRenderer fast paths for ByteArr - Add comprehensive test suite (56+ Scala tests + 4 Jsonnet golden tests) Result: Beats jrsonnet on DecodeBytes benchmarks (1.47x faster). Overall 15-38% faster than master on base64 workloads.
1 parent 4123ac3 commit dece904

28 files changed

Lines changed: 1801 additions & 102 deletions

.github/workflows/pr-build.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ jobs:
5151
name: Sjsonnet ${{ matrix.lang }} build
5252
steps:
5353
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
54+
with:
55+
submodules: ${{ matrix.lang == 'native' }}
5456
- uses: ./.github/actions/setup-build
5557
with:
5658
node: ${{ matrix.lang == 'js' || matrix.lang == 'wasm' }}

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "vendor/base64"]
2+
path = vendor/base64
3+
url = https://github.com/aklomp/base64.git
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
local largeStr = std.repeat("Lorem ipsum dolor sit amet, consectetur adipiscing elit. ", 100),
3+
local encoded = std.base64(largeStr),
4+
local decoded = std.base64Decode(encoded),
5+
local encodedArr = std.base64(std.makeArray(1000, function(i) i % 256)),
6+
local decodedBytes = std.base64DecodeBytes(encodedArr),
7+
8+
local encoded2 = std.base64(decoded),
9+
local decoded2 = std.base64Decode(encoded2),
10+
local encodedArr2 = std.base64(std.makeArray(2000, function(i) (i * 7 + 13) % 256)),
11+
local decodedBytes2 = std.base64DecodeBytes(encodedArr2),
12+
13+
local encoded3 = std.base64(decoded2),
14+
local decoded3 = std.base64Decode(encoded3),
15+
local encodedArr3 = std.base64(std.makeArray(3000, function(i) (i * 13 + 37) % 256)),
16+
local decodedBytes3 = std.base64DecodeBytes(encodedArr3),
17+
18+
roundtrip_ok: decoded3 == largeStr,
19+
byte_roundtrip_ok: std.length(decodedBytes3) == 3000,
20+
encoded_len: std.length(encoded3),
21+
decoded_len: std.length(decoded3)
22+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"byte_roundtrip_ok": true,
3+
"decoded_len": 5700,
4+
"encoded_len": 7600,
5+
"roundtrip_ok": true
6+
}

build.mill

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,41 @@ object sjsonnet extends VersionFileModule {
278278
def nativeLTO = LTO.Full
279279
def nativeMultithreading = None
280280

281+
// Build aklomp/base64 as static library for SIMD-accelerated base64
282+
def buildBase64Lib = Task {
283+
val base64Dir = BuildCtx.workspaceRoot / "vendor" / "base64"
284+
if (!os.exists(base64Dir / "CMakeLists.txt")) {
285+
throw new Exception(
286+
"vendor/base64 not found. Run: git submodule update --init vendor/base64"
287+
)
288+
}
289+
val buildDir = Task.ctx().dest / "base64-build"
290+
os.makeDir.all(buildDir)
291+
os.proc(
292+
"cmake",
293+
base64Dir.toString,
294+
"-DCMAKE_POSITION_INDEPENDENT_CODE=ON",
295+
"-DBASE64_WITH_OpenMP=OFF",
296+
"-DBASE64_BUILD_TESTS=OFF",
297+
"-DBASE64_BUILD_CLI=OFF",
298+
"-DCMAKE_BUILD_TYPE=Release"
299+
).call(cwd = buildDir)
300+
os.proc("cmake", "--build", buildDir.toString, "--config", "Release").call()
301+
PathRef(buildDir)
302+
}
303+
304+
def nativeLinkingOptions = Task {
305+
super.nativeLinkingOptions() ++ Seq(
306+
(buildBase64Lib().path / "libbase64.a").toString
307+
)
308+
}
309+
310+
def nativeCompileOptions = Task {
311+
super.nativeCompileOptions() ++ Seq(
312+
s"-I${BuildCtx.workspaceRoot / "vendor" / "base64" / "include"}"
313+
)
314+
}
315+
281316
object test extends ScalaNativeTests with CrossTests {
282317
def releaseMode = ReleaseMode.Debug
283318
def nativeMultithreading = None
@@ -286,6 +321,16 @@ object sjsonnet extends VersionFileModule {
286321
"SCALANATIVE_THREAD_STACK_SIZE" -> stackSize
287322
)
288323
def nativeLTO = LTO.None
324+
def nativeLinkingOptions = Task {
325+
super.nativeLinkingOptions() ++ Seq(
326+
(SjsonnetNativeModule.this.buildBase64Lib().path / "libbase64.a").toString
327+
)
328+
}
329+
def nativeCompileOptions = Task {
330+
super.nativeCompileOptions() ++ Seq(
331+
s"-I${BuildCtx.workspaceRoot / "vendor" / "base64" / "include"}"
332+
)
333+
}
289334
}
290335
}
291336

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package sjsonnet.stdlib
2+
3+
/**
4+
* Scala.js implementation of base64 encode/decode. Delegates to java.util.Base64 (provided by
5+
* Scala.js stdlib emulation).
6+
*/
7+
object PlatformBase64 {
8+
9+
def encodeToString(input: Array[Byte]): String =
10+
java.util.Base64.getEncoder.encodeToString(input)
11+
12+
def decode(input: String): Array[Byte] = {
13+
// Strict padding check: reject ASCII input whose length is not a multiple of 4.
14+
//
15+
// java.util.Base64 is lenient and accepts unpadded input (e.g. "YQ" instead of "YQ=="),
16+
// but the Jsonnet spec requires strict RFC 4648 compliance. Both go-jsonnet and C++ jsonnet
17+
// reject unpadded input:
18+
// - go-jsonnet: checks `len(str) % 4 != 0` before calling base64.StdEncoding.DecodeString
19+
// (see builtins.go:1467)
20+
// - C++ jsonnet: rejects with "Not a base64 encoded string"
21+
//
22+
// We only apply this check when all characters are ASCII. Non-ASCII characters (e.g. "ĀQ=")
23+
// are never valid base64 and should be caught by java.util.Base64 with a more specific
24+
// "Illegal base64 character" error message. This matches go-jsonnet's behavior where len()
25+
// counts UTF-8 bytes (so "ĀQ=" is 4 bytes, passes the length check, and fails on the
26+
// invalid character instead).
27+
val len = input.length
28+
if (len > 0 && len % 4 != 0) {
29+
var allAscii = true
30+
var i = 0
31+
while (i < len && allAscii) {
32+
if (input.charAt(i) > 127) allAscii = false
33+
i += 1
34+
}
35+
if (allAscii) {
36+
throw new IllegalArgumentException(
37+
"Last unit does not have enough valid bits"
38+
)
39+
}
40+
}
41+
java.util.Base64.getDecoder.decode(input)
42+
}
43+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package sjsonnet.stdlib
2+
3+
/**
4+
* JVM implementation of base64 encode/decode. Delegates to java.util.Base64 which has HotSpot
5+
* intrinsics for high performance.
6+
*/
7+
object PlatformBase64 {
8+
9+
def encodeToString(input: Array[Byte]): String =
10+
java.util.Base64.getEncoder.encodeToString(input)
11+
12+
def decode(input: String): Array[Byte] = {
13+
// Strict padding check: reject ASCII input whose length is not a multiple of 4.
14+
//
15+
// java.util.Base64 is lenient and accepts unpadded input (e.g. "YQ" instead of "YQ=="),
16+
// but the Jsonnet spec requires strict RFC 4648 compliance. Both go-jsonnet and C++ jsonnet
17+
// reject unpadded input:
18+
// - go-jsonnet: checks `len(str) % 4 != 0` before calling base64.StdEncoding.DecodeString
19+
// (see builtins.go:1467)
20+
// - C++ jsonnet: rejects with "Not a base64 encoded string"
21+
//
22+
// We only apply this check when all characters are ASCII. Non-ASCII characters (e.g. "ĀQ=")
23+
// are never valid base64 and should be caught by java.util.Base64 with a more specific
24+
// "Illegal base64 character" error message. This matches go-jsonnet's behavior where len()
25+
// counts UTF-8 bytes (so "ĀQ=" is 4 bytes, passes the length check, and fails on the
26+
// invalid character instead).
27+
val len = input.length
28+
if (len > 0 && len % 4 != 0) {
29+
var allAscii = true
30+
var i = 0
31+
while (i < len && allAscii) {
32+
if (input.charAt(i) > 127) allAscii = false
33+
i += 1
34+
}
35+
if (allAscii) {
36+
throw new IllegalArgumentException(
37+
"Last unit does not have enough valid bits"
38+
)
39+
}
40+
}
41+
java.util.Base64.getDecoder.decode(input)
42+
}
43+
}
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
package sjsonnet.stdlib
2+
3+
import scala.scalanative.unsafe._
4+
import scala.scalanative.unsigned._
5+
6+
/**
7+
* Scala Native implementation of base64 encode/decode.
8+
*
9+
* Uses the aklomp/base64 C library (BSD-2-Clause) which provides SIMD-accelerated base64 via
10+
* runtime CPU detection:
11+
* - x86_64: SSSE3 / SSE4.1 / SSE4.2 / AVX / AVX2 / AVX-512
12+
* - AArch64: NEON
13+
* - Fallback: optimized generic C implementation
14+
*
15+
* The static library is built by CMake and linked via nativeLinkingOptions.
16+
*
17+
* Both aklomp/base64 and C++ jsonnet (the reference implementation) use strict RFC 4648 mode:
18+
* padding is required, unpadded input is rejected. This differs from java.util.Base64 on JVM which
19+
* is more lenient (accepts unpadded input) — that JVM leniency is a pre-existing sjsonnet bug, not
20+
* something we replicate here.
21+
*/
22+
@extern
23+
private[stdlib] object libbase64 {
24+
def base64_encode(
25+
src: Ptr[CChar],
26+
srclen: CSize,
27+
out: Ptr[CChar],
28+
outlen: Ptr[CSize],
29+
flags: CInt
30+
): Unit = extern
31+
32+
def base64_decode(
33+
src: Ptr[CChar],
34+
srclen: CSize,
35+
out: Ptr[CChar],
36+
outlen: Ptr[CSize],
37+
flags: CInt
38+
): CInt = extern
39+
}
40+
41+
object PlatformBase64 {
42+
43+
// Base64 alphabet lookup: A-Z(0-25), a-z(26-51), 0-9(52-61), +(62), /(63)
44+
// -1 = invalid character
45+
private val DECODE_TABLE: Array[Int] = {
46+
val t = Array.fill[Int](256)(-1)
47+
var i = 0
48+
while (i < 26) { t('A' + i) = i; i += 1 }
49+
i = 0
50+
while (i < 26) { t('a' + i) = i + 26; i += 1 }
51+
i = 0
52+
while (i < 10) { t('0' + i) = i + 52; i += 1 }
53+
t('+') = 62
54+
t('/') = 63
55+
t
56+
}
57+
58+
/**
59+
* Diagnose why base64 decode failed and throw a JVM-compatible error message. Only called on the
60+
* error path (after aklomp/base64 returns failure), so zero overhead on the hot path.
61+
*
62+
* Error messages match java.util.Base64.Decoder behavior for golden test compatibility:
63+
* - Invalid character: "Illegal base64 character XX" (hex)
64+
* - Wrong length/padding: "Last unit does not have enough valid bits"
65+
*/
66+
private def throwDecodeError(srcBytes: Array[Byte]): Nothing = {
67+
val len = srcBytes.length
68+
69+
// Scan for invalid characters (first error wins)
70+
var i = 0
71+
while (i < len) {
72+
val b = srcBytes(i) & 0xff
73+
if (b != '='.toInt) {
74+
if (b >= DECODE_TABLE.length || DECODE_TABLE(b) < 0) {
75+
throw new IllegalArgumentException(
76+
"Illegal base64 character " + Integer.toHexString(b)
77+
)
78+
}
79+
}
80+
i += 1
81+
}
82+
83+
// No invalid characters found — must be a length/padding issue
84+
throw new IllegalArgumentException(
85+
"Last unit does not have enough valid bits"
86+
)
87+
}
88+
89+
def encodeToString(input: Array[Byte]): String = {
90+
if (input.length == 0) return ""
91+
val maxOutLen = ((input.length + 2) / 3) * 4
92+
Zone.acquire { implicit z =>
93+
val srcPtr = alloc[Byte](input.length.toUSize)
94+
var i = 0
95+
while (i < input.length) {
96+
!(srcPtr + i) = input(i)
97+
i += 1
98+
}
99+
val outPtr = alloc[Byte]((maxOutLen + 1).toUSize)
100+
val outLenPtr = alloc[CSize](1.toUSize)
101+
libbase64.base64_encode(srcPtr, input.length.toUSize, outPtr, outLenPtr, 0)
102+
val actualLen = (!outLenPtr).toInt
103+
val result = new Array[Byte](actualLen)
104+
i = 0
105+
while (i < actualLen) {
106+
result(i) = !(outPtr + i)
107+
i += 1
108+
}
109+
new String(result, "US-ASCII")
110+
}
111+
}
112+
113+
def decode(input: String): Array[Byte] = {
114+
if (input.isEmpty) return Array.emptyByteArray
115+
val srcBytes = input.getBytes("US-ASCII")
116+
val maxOutLen = ((srcBytes.length / 4) * 3) + 3
117+
Zone.acquire { implicit z =>
118+
val srcPtr = alloc[Byte](srcBytes.length.toUSize)
119+
var i = 0
120+
while (i < srcBytes.length) {
121+
!(srcPtr + i) = srcBytes(i)
122+
i += 1
123+
}
124+
val outPtr = alloc[Byte]((maxOutLen + 1).toUSize)
125+
val outLenPtr = alloc[CSize](1.toUSize)
126+
val ret =
127+
libbase64.base64_decode(srcPtr, srcBytes.length.toUSize, outPtr, outLenPtr, 0)
128+
if (ret == 0) {
129+
throwDecodeError(srcBytes)
130+
}
131+
val actualLen = (!outLenPtr).toInt
132+
val result = new Array[Byte](actualLen)
133+
i = 0
134+
while (i < actualLen) {
135+
result(i) = !(outPtr + i)
136+
i += 1
137+
}
138+
result
139+
}
140+
}
141+
}

sjsonnet/src/sjsonnet/BaseByteRenderer.scala

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,27 @@ class BaseByteRenderer[T <: java.io.OutputStream](
242242
else visitLongString(str)
243243
}
244244

245+
/**
246+
* Fast path for strings known to be ASCII-safe (no escaping needed, all chars 0x20-0x7E). Skips
247+
* SWAR scanning and UTF-8 encoding — writes bytes directly from chars.
248+
*/
249+
private[sjsonnet] def renderAsciiSafeString(str: String): Unit = {
250+
val len = str.length
251+
elemBuilder.ensureLength(len + 2)
252+
val arr = elemBuilder.arr
253+
var pos = elemBuilder.length
254+
arr(pos) = '"'.toByte
255+
pos += 1
256+
var i = 0
257+
while (i < len) {
258+
arr(pos) = str.charAt(i).toByte
259+
pos += 1
260+
i += 1
261+
}
262+
arr(pos) = '"'.toByte
263+
elemBuilder.length = pos + 1
264+
}
265+
245266
/**
246267
* Zero-allocation fast path for short ASCII strings (the vast majority of JSON keys/values). Uses
247268
* getChars to bulk-copy into a reusable char buffer, then scans the buffer directly (avoiding

0 commit comments

Comments
 (0)