Skip to content

Commit f5bf391

Browse files
authored
Optimize lstripChars, rstripChars, and stripChars via specialization (#236)
This PR optimizes `lstripChars` / `rstripChars` / `stripChars` built-in functions by using the specialization framework from #119 / 0bd255a to pre-compile and re-use `Pattern` instances when the replacement / strip arguments are constants. In Java, `String.replaceAll()` compiles and uses a `Pattern` under the hood and this is relatively expensive; specialization lets us save this cost when the `chars`-to-be-stripped are constant. ## Testing **Correctness**: ran all tests with a manual change to disable static function application optimizations (a prerequisite required to achieve full test coverage, as the static application prevents specialization from kicking in during most tests). In a followup, I think we should explore adding flags for optimizer features and running all tests with/without optimization. **Performance**: ran benchmarks on a large file and with this change I save ~11% of allocated bytes and ~8% of wall time. I ran that same file through `RunProfiler` and saw a large speedup in `stripChars`, costing ~619ns/call before and ~154ns/call after.
1 parent 15c9e0d commit f5bf391

1 file changed

Lines changed: 79 additions & 10 deletions

File tree

sjsonnet/src/sjsonnet/Std.scala

Lines changed: 79 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,82 @@ class Std {
456456
}
457457
}
458458

459+
private object StripUtils {
460+
private def getLeadingPattern(chars: String): Pattern =
461+
Pattern.compile("^[" + Regex.quote(chars) + "]+")
462+
463+
private def getTrailingPattern(chars: String): Pattern =
464+
Pattern.compile("[" + Regex.quote(chars) + "]+$")
465+
466+
def unspecializedStrip(str: String, chars: String, left: Boolean, right: Boolean): String = {
467+
var s = str
468+
if (right) s = getTrailingPattern(chars).matcher(s).replaceAll("")
469+
if (left) s = getLeadingPattern(chars).matcher(s).replaceAll("")
470+
s
471+
}
472+
473+
private class SpecStrip(
474+
chars: String,
475+
left: Boolean,
476+
right: Boolean,
477+
functionName: String
478+
) extends Val.Builtin1(functionName, "str") {
479+
private[this] val leftPattern = getLeadingPattern(chars)
480+
private[this] val rightPattern = getTrailingPattern(chars)
481+
482+
def evalRhs(str: Val, ev: EvalScope, pos: Position): Val = {
483+
var s = str.asString
484+
if (right) s = rightPattern.matcher(s).replaceAll("")
485+
if (left) s = leftPattern.matcher(s).replaceAll("")
486+
Val.Str(pos, s)
487+
}
488+
}
489+
490+
def trySpecialize(str: Expr, chars: Val.Str, left: Boolean, right: Boolean, name: String): (Val.Builtin, Array[Expr]) = {
491+
try {
492+
(new SpecStrip(chars.value, left, right, name), Array(str))
493+
} catch {
494+
case _: Exception => null
495+
}
496+
}
497+
}
498+
499+
object StripChars extends Val.Builtin2("stripChars", "str", "chars") {
500+
def evalRhs(str: Val, chars: Val, ev: EvalScope, pos: Position): Val = {
501+
Val.Str(pos, StripUtils.unspecializedStrip(str.asString, chars.asString, left = true, right = true))
502+
}
503+
504+
override def specialize(args: Array[Expr]): (Val.Builtin, Array[Expr]) = args match {
505+
case Array(str, chars: Val.Str) =>
506+
StripUtils.trySpecialize(str, chars, left = true, right = true, functionName)
507+
case _ => null
508+
}
509+
}
510+
511+
object LStripChars extends Val.Builtin2("lstripChars", "str", "chars") {
512+
def evalRhs(str: Val, chars: Val, ev: EvalScope, pos: Position): Val = {
513+
Val.Str(pos, StripUtils.unspecializedStrip(str.asString, chars.asString, left = true, right = false))
514+
}
515+
516+
override def specialize(args: Array[Expr]): (Val.Builtin, Array[Expr]) = args match {
517+
case Array(str, chars: Val.Str) =>
518+
StripUtils.trySpecialize(str, chars, left = true, right = false, functionName)
519+
case _ => null
520+
}
521+
}
522+
523+
object RStripChars extends Val.Builtin2("rstripChars", "str", "chars") {
524+
def evalRhs(str: Val, chars: Val, ev: EvalScope, pos: Position): Val = {
525+
Val.Str(pos, StripUtils.unspecializedStrip(str.asString, chars.asString, left = false, right = true))
526+
}
527+
528+
override def specialize(args: Array[Expr]): (Val.Builtin, Array[Expr]) = args match {
529+
case Array(str, chars: Val.Str) =>
530+
StripUtils.trySpecialize(str, chars, left = false, right = true, functionName)
531+
case _ => null
532+
}
533+
}
534+
459535
private object Join extends Val.Builtin2("join", "sep", "arr") {
460536
def evalRhs(sep: Val, _arr: Val, ev: EvalScope, pos: Position): Val = {
461537
val arr = implicitly[ReadWriter[Val.Arr]].apply(_arr)
@@ -1025,16 +1101,9 @@ class Std {
10251101
builtin(Char_),
10261102
builtin(StrReplace),
10271103
builtin(StrReplaceAll),
1028-
1029-
builtin("rstripChars", "str", "chars"){ (pos, ev, str: String, chars: String) =>
1030-
str.replaceAll("[" + Regex.quote(chars) + "]+$", "")
1031-
},
1032-
builtin("lstripChars", "str", "chars"){ (pos, ev, str: String, chars: String) =>
1033-
str.replaceAll("^[" + Regex.quote(chars) + "]+", "")
1034-
},
1035-
builtin("stripChars", "str", "chars"){ (pos, ev, str: String, chars: String) =>
1036-
str.replaceAll("[" + Regex.quote(chars) + "]+$", "").replaceAll("^[" + Regex.quote(chars) + "]+", "")
1037-
},
1104+
builtin(RStripChars),
1105+
builtin(LStripChars),
1106+
builtin(StripChars),
10381107
builtin(Join),
10391108
builtin(Member),
10401109

0 commit comments

Comments
 (0)