Skip to content

Commit bf0e393

Browse files
committed
refactor: deduplicate render utilities and add overflow guards
Motivation: Combined review of PR databricks#776 + databricks#778 identified ~130 lines of duplicated SWAR string rendering and long-to-char conversion code, plus two missing overflow checks in StringModule. Modification: - Extract renderQuotedStringSWAR as protected method in BaseCharRenderer, delegate from MaterializeJsonRenderer (removes ~60 lines duplication) - Make escapeCharInline protected, remove duplicate in Renderer - Consolidate Renderer.visitFloat64 onto inherited writeLongDirect, remove standalone RenderUtils.appendLong (~40 lines) - Add totalLen > Int.MaxValue guard in Join pre-sized allocation - Add Long overflow detection in parseDigits - Leverage _asciiSafe flag in Substr/Join to skip redundant scans Result: Net -132 lines. All tests pass across JVM/JS/Native/WASM.
1 parent e38e8c4 commit bf0e393

4 files changed

Lines changed: 64 additions & 196 deletions

File tree

sjsonnet/src/sjsonnet/BaseCharRenderer.scala

Lines changed: 49 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -259,53 +259,7 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output](
259259
flushBuffer()
260260
s match {
261261
case str: String if !escapeUnicode =>
262-
val len = str.length
263-
if (len == 0) {
264-
elemBuilder.ensureLength(2)
265-
elemBuilder.appendUnsafe('"')
266-
elemBuilder.appendUnsafe('"')
267-
} else {
268-
// Convert to char[] for SWAR scanning + bulk copy
269-
val chars = new Array[Char](len)
270-
str.getChars(0, len, chars, 0)
271-
val firstEscape = CharSWAR.findFirstEscapeCharChar(chars, 0, len)
272-
if (firstEscape < 0) {
273-
// Clean string — direct bulk copy
274-
elemBuilder.ensureLength(len + 2)
275-
elemBuilder.appendUnsafe('"')
276-
val cbArr = elemBuilder.arr
277-
val pos = elemBuilder.getLength
278-
System.arraycopy(chars, 0, cbArr, pos, len)
279-
elemBuilder.length = pos + len
280-
elemBuilder.appendUnsafe('"')
281-
} else {
282-
// Dirty string — chunked rendering: bulk copy clean segments, escape inline
283-
elemBuilder.ensureLength(len + len + 2)
284-
elemBuilder.appendUnsafe('"')
285-
var from = 0
286-
var escPos = firstEscape
287-
while (escPos >= 0) {
288-
if (escPos > from) {
289-
val chunkLen = escPos - from
290-
val cbArr = elemBuilder.arr
291-
val pos = elemBuilder.getLength
292-
System.arraycopy(chars, from, cbArr, pos, chunkLen)
293-
elemBuilder.length = pos + chunkLen
294-
}
295-
escapeCharInline(chars(escPos))
296-
from = escPos + 1
297-
escPos = if (from < len) CharSWAR.findFirstEscapeCharChar(chars, from, len) else -1
298-
}
299-
if (from < len) {
300-
val tailLen = len - from
301-
val cbArr = elemBuilder.arr
302-
val pos = elemBuilder.getLength
303-
System.arraycopy(chars, from, cbArr, pos, tailLen)
304-
elemBuilder.length = pos + tailLen
305-
}
306-
elemBuilder.appendUnsafe('"')
307-
}
308-
}
262+
renderQuotedStringSWAR(str)
309263
case _ =>
310264
upickle.core.RenderUtils.escapeChar(
311265
null,
@@ -319,8 +273,55 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output](
319273
out
320274
}
321275

276+
protected def renderQuotedStringSWAR(str: String): Unit = {
277+
val len = str.length
278+
if (len == 0) {
279+
elemBuilder.ensureLength(2)
280+
elemBuilder.appendUnsafe('"')
281+
elemBuilder.appendUnsafe('"')
282+
return
283+
}
284+
val chars = new Array[Char](len)
285+
str.getChars(0, len, chars, 0)
286+
val firstEscape = CharSWAR.findFirstEscapeCharChar(chars, 0, len)
287+
if (firstEscape < 0) {
288+
elemBuilder.ensureLength(len + 2)
289+
elemBuilder.appendUnsafe('"')
290+
val cbArr = elemBuilder.arr
291+
val pos = elemBuilder.getLength
292+
System.arraycopy(chars, 0, cbArr, pos, len)
293+
elemBuilder.length = pos + len
294+
elemBuilder.appendUnsafe('"')
295+
} else {
296+
elemBuilder.ensureLength(len + len + 2)
297+
elemBuilder.appendUnsafe('"')
298+
var from = 0
299+
var escPos = firstEscape
300+
while (escPos >= 0) {
301+
if (escPos > from) {
302+
val chunkLen = escPos - from
303+
val cbArr = elemBuilder.arr
304+
val pos = elemBuilder.getLength
305+
System.arraycopy(chars, from, cbArr, pos, chunkLen)
306+
elemBuilder.length = pos + chunkLen
307+
}
308+
escapeCharInline(chars(escPos))
309+
from = escPos + 1
310+
escPos = if (from < len) CharSWAR.findFirstEscapeCharChar(chars, from, len) else -1
311+
}
312+
if (from < len) {
313+
val tailLen = len - from
314+
val cbArr = elemBuilder.arr
315+
val pos = elemBuilder.getLength
316+
System.arraycopy(chars, from, cbArr, pos, tailLen)
317+
elemBuilder.length = pos + tailLen
318+
}
319+
elemBuilder.appendUnsafe('"')
320+
}
321+
}
322+
322323
/** Inline JSON escape for a single char. */
323-
private def escapeCharInline(c: Char): Unit = {
324+
protected def escapeCharInline(c: Char): Unit = {
324325
elemBuilder.ensureLength(6)
325326
(c: @scala.annotation.switch) match {
326327
case '"' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('"')

sjsonnet/src/sjsonnet/Renderer.scala

Lines changed: 2 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@ class Renderer(out: Writer = new java.io.StringWriter(), indent: Int = -1)
1717
flushBuffer()
1818
val i = d.toLong
1919
if (d == i) {
20-
// Fast path: render integers directly to char buffer, avoiding String allocation.
21-
// Most numbers in Jsonnet output are integers (array indices, counters, etc.).
22-
RenderUtils.appendLong(elemBuilder, i)
20+
writeLongDirect(i)
2321
} else if (d % 1 == 0) {
2422
appendString(
2523
BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString()
@@ -352,81 +350,7 @@ final case class MaterializeJsonRenderer(
352350
}
353351

354352
/** Render a quoted string into elemBuilder (char-based) with chunked SWAR scanning. */
355-
private def renderQuotedString(str: String): Unit = {
356-
val len = str.length
357-
if (len == 0) {
358-
elemBuilder.ensureLength(2)
359-
elemBuilder.appendUnsafe('"')
360-
elemBuilder.appendUnsafe('"')
361-
return
362-
}
363-
// Convert to char[] for SWAR scanning + bulk copy
364-
val chars = new Array[Char](len)
365-
str.getChars(0, len, chars, 0)
366-
val firstEscape = CharSWAR.findFirstEscapeCharChar(chars, 0, len)
367-
if (firstEscape < 0) {
368-
// Clean string — direct bulk copy
369-
elemBuilder.ensureLength(len + 2)
370-
elemBuilder.appendUnsafe('"')
371-
val cbArr = elemBuilder.arr
372-
val pos = elemBuilder.getLength
373-
System.arraycopy(chars, 0, cbArr, pos, len)
374-
elemBuilder.length = pos + len
375-
elemBuilder.appendUnsafe('"')
376-
} else {
377-
// Dirty string — chunked rendering: bulk copy clean segments, escape inline
378-
elemBuilder.ensureLength(len + len + 2)
379-
elemBuilder.appendUnsafe('"')
380-
var from = 0
381-
var escPos = firstEscape
382-
while (escPos >= 0) {
383-
// Copy clean chunk before escape char
384-
if (escPos > from) {
385-
val chunkLen = escPos - from
386-
val cbArr = elemBuilder.arr
387-
val pos = elemBuilder.getLength
388-
System.arraycopy(chars, from, cbArr, pos, chunkLen)
389-
elemBuilder.length = pos + chunkLen
390-
}
391-
// Escape the char inline
392-
escapeCharInline(chars(escPos))
393-
from = escPos + 1
394-
// Find next escape char using SWAR
395-
escPos = if (from < len) CharSWAR.findFirstEscapeCharChar(chars, from, len) else -1
396-
}
397-
// Copy remaining clean tail
398-
if (from < len) {
399-
val tailLen = len - from
400-
val cbArr = elemBuilder.arr
401-
val pos = elemBuilder.getLength
402-
System.arraycopy(chars, from, cbArr, pos, tailLen)
403-
elemBuilder.length = pos + tailLen
404-
}
405-
elemBuilder.appendUnsafe('"')
406-
}
407-
}
408-
409-
/** Inline JSON escape for a single char. */
410-
private def escapeCharInline(c: Char): Unit = {
411-
elemBuilder.ensureLength(6)
412-
(c: @scala.annotation.switch) match {
413-
case '"' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('"')
414-
case '\\' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('\\')
415-
case '\b' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('b')
416-
case '\f' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('f')
417-
case '\n' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('n')
418-
case '\r' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('r')
419-
case '\t' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('t')
420-
case _ =>
421-
// Control chars → \\u00XX
422-
elemBuilder.appendUnsafe('\\')
423-
elemBuilder.appendUnsafe('u')
424-
elemBuilder.appendUnsafe('0')
425-
elemBuilder.appendUnsafe('0')
426-
elemBuilder.appendUnsafe(BaseByteRenderer.HEX_CHARS((c >> 4) & 0xf))
427-
elemBuilder.appendUnsafe(BaseByteRenderer.HEX_CHARS(c & 0xf))
428-
}
429-
}
353+
private def renderQuotedString(str: String): Unit = renderQuotedStringSWAR(str)
430354

431355
/** Render a double value directly into the char buffer. */
432356
@inline private def renderDouble(d: Double): Unit = {
@@ -653,54 +577,4 @@ object RenderUtils {
653577
BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString()
654578
} else d.toString
655579
}
656-
657-
/** Maximum number of digits in a Long value (Long.MinValue = -9223372036854775808, 20 chars). */
658-
private final val MaxLongChars = 20
659-
660-
/**
661-
* Render a long value directly into a [[upickle.core.CharBuilder]], avoiding the intermediate
662-
* `String` allocation that `Long.toString` would create. For small absolute values (the common
663-
* case in Jsonnet output — array lengths, indices, counters), this saves one allocation per
664-
* number. The algorithm writes digits in reverse then reverses in-place.
665-
*/
666-
def appendLong(cb: upickle.core.CharBuilder, value: Long): Unit = {
667-
if (value == 0) {
668-
cb.append('0')
669-
return
670-
}
671-
672-
cb.ensureLength(MaxLongChars)
673-
val arr = cb.arr
674-
var pos = cb.getLength
675-
676-
val negative = value < 0
677-
// Use negative accumulator to handle Long.MinValue correctly
678-
var n = if (negative) value else -value
679-
val startPos = pos
680-
681-
while (n != 0) {
682-
val digit = -(n % 10).toInt
683-
arr(pos) = ('0' + digit).toChar
684-
pos += 1
685-
n /= 10
686-
}
687-
688-
if (negative) {
689-
arr(pos) = '-'
690-
pos += 1
691-
}
692-
693-
// Reverse the digits in-place
694-
var lo = startPos
695-
var hi = pos - 1
696-
while (lo < hi) {
697-
val tmp = arr(lo)
698-
arr(lo) = arr(hi)
699-
arr(hi) = tmp
700-
lo += 1
701-
hi -= 1
702-
}
703-
704-
cb.length = pos
705-
}
706580
}

sjsonnet/src/sjsonnet/stdlib/StringModule.scala

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,7 @@ object StringModule extends AbstractFunctionModule {
6262
case _ => Error.fail("Expected a number for len in substr, got " + len.value.prettyName)
6363
}
6464

65-
// Fast path: ASCII-only strings have 1:1 char-to-codepoint mapping,
66-
// avoiding expensive codePointCount/offsetByCodePoints scans.
67-
if (CharSWAR.isAllAscii(str)) {
65+
if (srcAsciiSafe || CharSWAR.isAllAscii(str)) {
6866
val strLen = str.length
6967
val safeOffset = math.min(offset, strLen)
7068
val safeLength = math.min(length, strLen - safeOffset)
@@ -288,7 +286,9 @@ object StringModule extends AbstractFunctionModule {
288286
val sepLen = s.length
289287
var totalLen = 0L
290288
var count = 0
291-
var allAsciiSafe = CharSWAR.isAllAscii(s) && !CharSWAR.hasEscapeChar(s)
289+
val sepVal = sep.value.asInstanceOf[Val.Str]
290+
var allAsciiSafe =
291+
sepVal._asciiSafe || (CharSWAR.isAllAscii(s) && !CharSWAR.hasEscapeChar(s))
292292
var i = 0
293293
while (i < arr.length) {
294294
arr.value(i) match {
@@ -303,6 +303,8 @@ object StringModule extends AbstractFunctionModule {
303303
i += 1
304304
}
305305
if (count == 0) return Val.Str(pos, "")
306+
if (totalLen > Int.MaxValue)
307+
Error.fail("Join result too large: " + totalLen + " characters")
306308
// Pass 2: build result in pre-sized char array.
307309
val chars = new Array[Char](totalLen.toInt)
308310
var cPos = 0
@@ -440,7 +442,10 @@ object StringModule extends AbstractFunctionModule {
440442
else -1
441443
if (d < 0 || d >= base)
442444
Error.fail("Cannot parse '" + s + "' as an integer in base " + base)
443-
result = result * base + d
445+
val next = result * base + d
446+
if (next < result)
447+
Error.fail("Integer overflow parsing '" + s + "' in base " + base)
448+
result = next
444449
i += 1
445450
}
446451
if (isNeg) -result else result

sjsonnet/test/src/sjsonnet/RendererTests.scala

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -43,26 +43,14 @@ object RendererTests extends TestSuite {
4343
|]""".stripMargin
4444
}
4545

46-
test("appendLong") {
47-
def render(v: Long): String = {
48-
val cb = new upickle.core.CharBuilder
49-
RenderUtils.appendLong(cb, v)
50-
cb.makeString()
51-
}
52-
test("zero") { render(0L) ==> "0" }
53-
test("positive") { render(42L) ==> "42" }
54-
test("negative") { render(-1L) ==> "-1" }
55-
test("large") { render(9999999999L) ==> "9999999999" }
56-
test("maxValue") { render(Long.MaxValue) ==> Long.MaxValue.toString }
57-
test("minValue") { render(Long.MinValue) ==> Long.MinValue.toString }
58-
}
59-
6046
test("visitFloat64Integers") {
61-
// Verify that integer-valued doubles render correctly via the Renderer
6247
ujson.transform(ujson.Num(0), new Renderer()).toString ==> "0"
6348
ujson.transform(ujson.Num(42), new Renderer()).toString ==> "42"
6449
ujson.transform(ujson.Num(-1), new Renderer()).toString ==> "-1"
6550
ujson.transform(ujson.Num(1e15), new Renderer()).toString ==> "1000000000000000"
51+
ujson.transform(ujson.Num(9999999999.0), new Renderer()).toString ==> "9999999999"
52+
ujson.transform(ujson.Num(Long.MaxValue.toDouble), new Renderer()).toString ==>
53+
Long.MaxValue.toDouble.toLong.toString
6654
}
6755

6856
test("indentZero") {

0 commit comments

Comments
 (0)