diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java index a5228edc33c83..01aac856b353c 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java @@ -25,6 +25,8 @@ import java.util.ArrayList; import java.util.List; import java.util.Locale; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import javax.crypto.Cipher; import javax.crypto.spec.GCMParameterSpec; import javax.crypto.spec.IvParameterSpec; @@ -342,4 +344,19 @@ public static UTF8String quote(UTF8String str) { String sp = str.toString().replaceAll(qtChar, qtCharRep); return UTF8String.fromString(qtChar + sp + qtChar); } + + /** + * Compiles {@code regex} with the given {@code flags} for the regexp expression + * family, translating a {@link PatternSyntaxException} into the user-facing + * INVALID_PARAMETER_VALUE.PATTERN error. Shared by the regexp eval and codegen + * paths so the generated Java is a single call instead of an inline try/catch + * around {@code Pattern.compile}. + */ + public static Pattern compileRegexPattern(String regex, int flags, String funcName) { + try { + return Pattern.compile(regex, flags); + } catch (PatternSyntaxException e) { + throw QueryExecutionErrors.invalidPatternError(funcName, e.getPattern(), e); + } + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 5ad360a54e8d5..c2c01d2c78159 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -1253,17 +1253,15 @@ object RegExpUtils { val termLastRegex = ctx.addMutableState("UTF8String", "lastRegex") val termPattern = ctx.addMutableState(classNamePattern, "pattern") val collationRegexFlags = CollationSupport.collationAwareRegexFlags(collationId) + val utils = classOf[ExpressionImplUtils].getName s""" |if (!$regexp.equals($termLastRegex)) { | // regex value changed - | try { - | UTF8String r = $regexp.clone(); - | $termPattern = $classNamePattern.compile(r.toString(), $collationRegexFlags); - | $termLastRegex = r; - | } catch (java.util.regex.PatternSyntaxException e) { - | throw QueryExecutionErrors.invalidPatternError("$prettyName", e.getPattern(), e); - | } + | UTF8String r = $regexp.clone(); + | $termPattern = + | $utils.compileRegexPattern(r.toString(), $collationRegexFlags, "$prettyName"); + | $termLastRegex = r; |} |java.util.regex.Matcher $matcher = $termPattern.matcher($subject.toString()); |""".stripMargin @@ -1272,12 +1270,8 @@ object RegExpUtils { def getPatternAndLastRegex(p: Any, prettyName: String, collationId: Int): (Pattern, UTF8String) = { val r = p.asInstanceOf[UTF8String].clone() - val pattern = try { - Pattern.compile(r.toString, CollationSupport.collationAwareRegexFlags(collationId)) - } catch { - case e: PatternSyntaxException => - throw QueryExecutionErrors.invalidPatternError(prettyName, e.getPattern, e) - } + val pattern = ExpressionImplUtils.compileRegexPattern( + r.toString, CollationSupport.collationAwareRegexFlags(collationId), prettyName) (pattern, r) } }