Skip to content

Commit f78b8fa

Browse files
STDREGEX
1 parent 89c04a0 commit f78b8fa

12 files changed

Lines changed: 167 additions & 35 deletions

File tree

build.sbt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ lazy val main = (project in file("sjsonnet"))
2121
"org.scala-lang.modules" %% "scala-collection-compat" % "2.11.0",
2222
"org.tukaani" % "xz" % "1.8",
2323
"org.yaml" % "snakeyaml" % "1.33",
24+
"com.google.re2j" % "re2j" % "1.7",
2425
),
2526
libraryDependencies ++= Seq(
2627
"com.lihaoyi" %% "utest" % "0.8.2",

build.sc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ object sjsonnet extends Module {
108108
ivy"org.json:json:20240303",
109109
ivy"org.tukaani:xz::1.10",
110110
ivy"org.lz4:lz4-java::1.8.0",
111-
ivy"org.yaml:snakeyaml::1.33"
111+
ivy"org.yaml:snakeyaml::1.33",
112+
ivy"com.google.re2j:re2j:1.7",
112113
)
113114
def scalacOptions = Seq("-opt:l:inline", "-opt-inline-from:sjsonnet.**")
114115

sjsonnet/src-js/sjsonnet/Platform.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
package sjsonnet
2+
23
import java.io.File
4+
import java.util
5+
import java.util.regex.Pattern
6+
7+
38
object Platform {
49
def gzipBytes(s: Array[Byte]): String = {
510
throw new Exception("GZip not implemented in Scala.js")
@@ -34,4 +39,11 @@ object Platform {
3439
def hashFile(file: File): String = {
3540
throw new Exception("hashFile not implemented in Scala.js")
3641
}
42+
43+
private val regexCache = new util.LinkedHashMap[String, Pattern](100, 0.75f, true) {
44+
override def removeEldestEntry(eldest: util.Map.Entry[String, Pattern]): Boolean = size() > 100
45+
}
46+
def getPatternFromCache(pat: String) : Pattern = regexCache.computeIfAbsent(pat, _ => Pattern.compile(pat))
47+
48+
def regexQuote(s: String): String = Pattern.quote(s)
3749
}

sjsonnet/src-jvm/sjsonnet/Platform.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
package sjsonnet
22

33
import java.io.{BufferedInputStream, ByteArrayOutputStream, File, FileInputStream}
4+
import java.util
45
import java.util.Base64
56
import java.util.zip.GZIPOutputStream
7+
import com.google.re2j.Pattern
68
import net.jpountz.xxhash.{StreamingXXHash64, XXHashFactory}
79
import org.json.{JSONArray, JSONObject}
810
import org.tukaani.xz.LZMA2Options
911
import org.tukaani.xz.XZOutputStream
1012
import org.yaml.snakeyaml.{LoaderOptions, Yaml}
1113
import org.yaml.snakeyaml.constructor.SafeConstructor
14+
1215
import scala.jdk.CollectionConverters._
1316

1417
object Platform {
@@ -107,4 +110,11 @@ object Platform {
107110

108111
hash.getValue.toString
109112
}
113+
114+
private val regexCache = new util.LinkedHashMap[String, Pattern](100, 0.75f, true) {
115+
override def removeEldestEntry(eldest: util.Map.Entry[String, Pattern]): Boolean = size() > 100
116+
}
117+
def getPatternFromCache(pat: String) : Pattern = regexCache.computeIfAbsent(pat, _ => Pattern.compile(pat))
118+
119+
def regexQuote(s: String): String = Pattern.quote(s)
110120
}

sjsonnet/src-native/sjsonnet/Platform.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
package sjsonnet
22

33
import java.io.{ByteArrayOutputStream, File}
4+
import java.util
45
import java.util.Base64
56
import java.util.zip.GZIPOutputStream
7+
import java.util.regex.Pattern
68

79
object Platform {
810
def gzipBytes(b: Array[Byte]): String = {
@@ -50,4 +52,11 @@ object Platform {
5052
// File hashes in Scala Native are just the file content
5153
scala.io.Source.fromFile(file).mkString
5254
}
55+
56+
private val regexCache = new util.LinkedHashMap[String, Pattern](100, 0.75f, true) {
57+
override def removeEldestEntry(eldest: util.Map.Entry[String, Pattern]): Boolean = size() > 100
58+
}
59+
def getPatternFromCache(pat: String) : Pattern = regexCache.computeIfAbsent(pat, _ => Pattern.compile(pat))
60+
61+
def regexQuote(s: String): String = Pattern.quote(s)
5362
}

sjsonnet/src/sjsonnet/PrettyYamlRenderer.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
package sjsonnet
22

33
import java.io.{StringWriter, Writer}
4-
import java.util.regex.Pattern
54

65
import upickle.core.{ArrVisitor, ObjVisitor}
7-
import fastparse.IndexedParserInput
86

97
import scala.collection.mutable
108
/**
@@ -240,7 +238,7 @@ object PrettyYamlRenderer{
240238
*/
241239
def writeBlockString(str: String, out: Writer, depth: Int, indent: Int, lineComment: String) = {
242240
val len = str.length()
243-
val splits = YamlRenderer.newlinePattern.split(str, -1)
241+
val splits = Platform.getPatternFromCache("\n").split(str, -1)
244242
val blockOffsetNumeral = if (str.charAt(0) != ' ') "" else indent
245243
val (blockStyle, dropRight) =
246244
(str.charAt(len - 1), if (len > 2) Some(str.charAt(len - 2)) else None) match{

sjsonnet/src/sjsonnet/Std.scala

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,10 @@ import java.io.StringWriter
44
import java.nio.charset.StandardCharsets.UTF_8
55
import java.util.Base64
66
import java.util
7-
import java.util.regex.Pattern
87
import sjsonnet.Expr.Member.Visibility
98

109
import scala.collection.Searching._
1110
import scala.collection.mutable
12-
import scala.util.matching.Regex
1311

1412
/**
1513
* The Jsonnet standard library, `std`, with each builtin function implemented
@@ -19,8 +17,8 @@ import scala.util.matching.Regex
1917
class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.empty) {
2018
private val dummyPos: Position = new Position(null, 0)
2119
private val emptyLazyArray = new Array[Lazy](0)
22-
private val leadingWhiteSpacePattern = Pattern.compile("^[ \t\n\f\r\u0085\u00A0']+")
23-
private val trailingWhiteSpacePattern = Pattern.compile("[ \t\n\f\r\u0085\u00A0']+$")
20+
private val leadingWhiteSpacePattern = Platform.getPatternFromCache("^[ \t\n\f\r\u0085\u00A0']+")
21+
private val trailingWhiteSpacePattern = Platform.getPatternFromCache("[ \t\n\f\r\u0085\u00A0']+$")
2422
private val oldNativeFunctions = Map(
2523
builtin("gzip", "v"){ (_, _, v: Val) =>
2624
v match{
@@ -48,7 +46,7 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
4846
},
4947
)
5048
require(oldNativeFunctions.forall(k => !additionalNativeFunctions.contains(k._1)), "Conflicting native functions")
51-
private val nativeFunctions = oldNativeFunctions ++ additionalNativeFunctions
49+
private val nativeFunctions = oldNativeFunctions ++ additionalNativeFunctions ++ StdRegex.functions
5250

5351
private object AssertEqual extends Val.Builtin2("assertEqual", "a", "b") {
5452
def evalRhs(v1: Val, v2: Val, ev: EvalScope, pos: Position): Val = {
@@ -474,26 +472,24 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
474472
Val.Str(pos, str.asString.replaceAll(from.asString, to.asString))
475473
override def specialize(args: Array[Expr]) = args match {
476474
case Array(str, from: Val.Str, to) =>
477-
try { (new SpecFrom(Pattern.compile(from.value)), Array(str, to)) } catch { case _: Exception => null }
475+
try { (new SpecFrom(from.value), Array(str, to)) } catch { case _: Exception => null }
478476
case _ => null
479477
}
480-
private class SpecFrom(from: Pattern) extends Val.Builtin2("strReplaceAll", "str", "to") {
478+
private class SpecFrom(from: String) extends Val.Builtin2("strReplaceAll", "str", "to") {
481479
def evalRhs(str: Val, to: Val, ev: EvalScope, pos: Position): Val =
482-
Val.Str(pos, from.matcher(str.asString).replaceAll(to.asString))
480+
Val.Str(pos, Platform.getPatternFromCache(from).matcher(str.asString).replaceAll(to.asString))
483481
}
484482
}
485483

486484
private object StripUtils {
487-
private def getLeadingPattern(chars: String): Pattern =
488-
Pattern.compile("^[" + Regex.quote(chars) + "]+")
485+
private def getLeadingPattern(chars: String): String = "^[" + Platform.regexQuote(chars) + "]+"
489486

490-
private def getTrailingPattern(chars: String): Pattern =
491-
Pattern.compile("[" + Regex.quote(chars) + "]+$")
487+
private def getTrailingPattern(chars: String): String = "[" + Platform.regexQuote(chars) + "]+$"
492488

493489
def unspecializedStrip(str: String, chars: String, left: Boolean, right: Boolean): String = {
494490
var s = str
495-
if (right) s = getTrailingPattern(chars).matcher(s).replaceAll("")
496-
if (left) s = getLeadingPattern(chars).matcher(s).replaceAll("")
491+
if (right) s = Platform.getPatternFromCache(getTrailingPattern(chars)).matcher(s).replaceAll("")
492+
if (left) s = Platform.getPatternFromCache(getLeadingPattern(chars)).matcher(s).replaceAll("")
497493
s
498494
}
499495

@@ -508,8 +504,8 @@ class Std(private val additionalNativeFunctions: Map[String, Val.Builtin] = Map.
508504

509505
def evalRhs(str: Val, ev: EvalScope, pos: Position): Val = {
510506
var s = str.asString
511-
if (right) s = rightPattern.matcher(s).replaceAll("")
512-
if (left) s = leftPattern.matcher(s).replaceAll("")
507+
if (right) s = Platform.getPatternFromCache(rightPattern).matcher(s).replaceAll("")
508+
if (left) s = Platform.getPatternFromCache(leftPattern).matcher(s).replaceAll("")
513509
Val.Str(pos, s)
514510
}
515511
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package sjsonnet
2+
3+
import sjsonnet.Expr.Member.Visibility
4+
import sjsonnet.Val.Obj
5+
6+
object StdRegex {
7+
def functions: Map[String, Val.Builtin] = Map(
8+
"regexPartialMatch" -> new Val.Builtin2("regexPartialMatch", "pattern", "str") {
9+
override def evalRhs(pattern: Val, str: Val, ev: EvalScope, pos: Position): Val = {
10+
val compiledPattern = Platform.getPatternFromCache(pattern.asString)
11+
val matcher = compiledPattern.matcher(str.asString)
12+
var returnStr: Val = Val.Null(pos.noOffset)
13+
val captures = Array.newBuilder[Val.Str]
14+
val groupCount = matcher.groupCount()
15+
while (matcher.find()) {
16+
if (returnStr.isInstanceOf[Val.Null]) {
17+
returnStr = Val.Str(pos.noOffset, matcher.group(0))
18+
}
19+
for (i <- 1 to groupCount) {
20+
captures += Val.Str(pos.noOffset, matcher.group(i))
21+
}
22+
}
23+
val result = captures.result()
24+
Val.Obj.mk(pos.noOffset,
25+
"string" -> new Obj.ConstMember(true, Visibility.Normal, returnStr),
26+
"captures" -> new Obj.ConstMember(true, Visibility.Normal, new Val.Arr(pos.noOffset, result))
27+
)
28+
}
29+
},
30+
"regexFullMatch" -> new Val.Builtin2("regexFullMatch", "pattern", "str") {
31+
override def evalRhs(pattern: Val, str: Val, ev: EvalScope, pos: Position): Val = {
32+
val compiledPattern = Platform.getPatternFromCache(pattern.asString)
33+
val matcher = compiledPattern.matcher(str.asString)
34+
if (!matcher.matches()) {
35+
Val.Obj.mk(pos.noOffset,
36+
"string" -> new Obj.ConstMember(true, Visibility.Normal, Val.Null(pos.noOffset)),
37+
"captures" -> new Obj.ConstMember(true, Visibility.Normal, new Val.Arr(pos.noOffset, Array.empty[Lazy]))
38+
)
39+
} else {
40+
val captures = Array.newBuilder[Val.Str]
41+
val groupCount = matcher.groupCount()
42+
for (i <- 0 to groupCount) {
43+
captures += Val.Str(pos.noOffset, matcher.group(i))
44+
}
45+
val result = captures.result()
46+
Val.Obj.mk(pos.noOffset,
47+
"string" -> new Obj.ConstMember(true, Visibility.Normal, result.head),
48+
"captures" -> new Obj.ConstMember(true, Visibility.Normal, new Val.Arr(pos.noOffset, result.drop(1)))
49+
)
50+
}
51+
}
52+
},
53+
"regexGlobalReplace" -> new Val.Builtin3("regexGlobalReplace", "str", "pattern", "to") {
54+
override def evalRhs(str: Val, pattern: Val, to: Val, ev: EvalScope, pos: Position): Val = {
55+
val compiledPattern = Platform.getPatternFromCache(pattern.asString)
56+
val matcher = compiledPattern.matcher(str.asString)
57+
Val.Str(pos.noOffset, matcher.replaceAll(to.asString))
58+
}
59+
},
60+
"regexReplace" -> new Val.Builtin3("regexGlobalReplace", "str", "pattern", "to") {
61+
override def evalRhs(str: Val, pattern: Val, to: Val, ev: EvalScope, pos: Position): Val = {
62+
val compiledPattern = Platform.getPatternFromCache(pattern.asString)
63+
val matcher = compiledPattern.matcher(str.asString)
64+
Val.Str(pos.noOffset, matcher.replaceFirst(to.asString))
65+
}
66+
},
67+
"regexQuoteMeta" -> new Val.Builtin1("regexQuoteMeta", "str") {
68+
override def evalRhs(str: Val, ev: EvalScope, pos: Position): Val = {
69+
Val.Str(pos.noOffset, Platform.regexQuote(str.asString))
70+
}
71+
}
72+
)
73+
}

sjsonnet/src/sjsonnet/TomlRenderer.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package sjsonnet
33
import upickle.core.{ArrVisitor, CharBuilder, ObjVisitor, SimpleVisitor, Visitor}
44

55
import java.io.StringWriter
6-
import java.util.regex.Pattern
76

87

98
class TomlRenderer(out: StringWriter = new java.io.StringWriter(), cumulatedIndent: String, indent: String) extends SimpleVisitor[StringWriter, StringWriter]{
@@ -117,7 +116,7 @@ class TomlRenderer(out: StringWriter = new java.io.StringWriter(), cumulatedInde
117116
}
118117

119118
object TomlRenderer {
120-
private val bareAllowed = Pattern.compile("[A-Za-z0-9_-]+")
119+
private val bareAllowed = Platform.getPatternFromCache("[A-Za-z0-9_-]+")
121120
def escapeKey(key: String): String = if (bareAllowed.matcher(key).matches()) key else {
122121
val out = new StringWriter()
123122
BaseRenderer.escape(out, key, unicode = true)

sjsonnet/src/sjsonnet/YamlRenderer.scala

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
11
package sjsonnet
22

33
import java.io.StringWriter
4-
import java.util.regex.Pattern
54
import upickle.core.{ArrVisitor, ObjVisitor, SimpleVisitor, Visitor}
65

7-
import scala.util.Try
8-
9-
106

117
class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayInObject: Boolean = false,
128
quoteKeys: Boolean = true, indent: Int = 2) extends BaseCharRenderer(_out, indent){
@@ -52,7 +48,7 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
5248
elemBuilder.append('"')
5349
elemBuilder.append('"')
5450
} else if (s.charAt(len - 1) == '\n') {
55-
val splits = YamlRenderer.newlinePattern.split(s)
51+
val splits = Platform.getPatternFromCache("\n").split(s.toString)
5652
elemBuilder.append('|')
5753
depth += 1
5854
splits.foreach { split =>
@@ -174,15 +170,14 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
174170
}
175171
}
176172
object YamlRenderer{
177-
val newlinePattern: Pattern = Pattern.compile("\n")
178-
private val safeYamlKeyPattern = Pattern.compile("^[a-zA-Z0-9/._-]+$")
173+
private val safeYamlKeyPattern = Platform.getPatternFromCache("^[a-zA-Z0-9/._-]+$")
179174
private val yamlReserved = Set("true", "false", "null", "yes", "no", "on", "off", "y", "n", ".nan",
180175
"+.inf", "-.inf", ".inf", "null", "-", "---", "''")
181-
private val yamlTimestampPattern = Pattern.compile("^(?:[0-9]*-){2}[0-9]*$")
182-
private val yamlBinaryPattern = Pattern.compile("^[-+]?0b[0-1_]+$")
183-
private val yamlHexPattern = Pattern.compile("[-+]?0x[0-9a-fA-F_]+")
184-
private val yamlFloatPattern = Pattern.compile( "^-?([0-9_]*)*(\\.[0-9_]*)?(e[-+][0-9_]+)?$" )
185-
private val yamlIntPattern = Pattern.compile("^[-+]?[0-9_]+$")
176+
private val yamlTimestampPattern = Platform.getPatternFromCache("^(?:[0-9]*-){2}[0-9]*$")
177+
private val yamlBinaryPattern = Platform.getPatternFromCache("^[-+]?0b[0-1_]+$")
178+
private val yamlHexPattern = Platform.getPatternFromCache("[-+]?0x[0-9a-fA-F_]+")
179+
private val yamlFloatPattern = Platform.getPatternFromCache( "^-?([0-9_]*)*(\\.[0-9_]*)?(e[-+][0-9_]+)?$" )
180+
private val yamlIntPattern = Platform.getPatternFromCache("^[-+]?[0-9_]+$")
186181

187182
private def isSafeBareKey(k: String) = {
188183
val l = k.toLowerCase

0 commit comments

Comments
 (0)