diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java index a5228edc33c83..7bad7c430b862 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Locale; +import java.util.zip.CRC32; import javax.crypto.Cipher; import javax.crypto.spec.GCMParameterSpec; import javax.crypto.spec.IvParameterSpec; @@ -342,4 +343,15 @@ public static UTF8String quote(UTF8String str) { String sp = str.toString().replaceAll(qtChar, qtCharRep); return UTF8String.fromString(qtChar + sp + qtChar); } + + /** + * Computes the CRC32 checksum of {@code bytes} for the {@code crc32} expression. + * Shared by the eval and codegen paths so the per-stage generated Java is a + * single call rather than an inline allocate / update / getValue sequence. + */ + public static long crc32(byte[] bytes) { + CRC32 checksum = new CRC32(); + checksum.update(bytes, 0, bytes.length); + return checksum.getValue(); + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala index 0f27dee9dbc84..0b0c84176691e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.expressions import java.math.{BigDecimal, RoundingMode} import java.util.concurrent.TimeUnit._ -import java.util.zip.CRC32 import scala.annotation.tailrec @@ -214,20 +213,13 @@ case class Crc32(child: Expression) override def contextIndependentFoldable: Boolean = child.contextIndependentFoldable protected override def nullSafeEval(input: Any): Any = { - val checksum = new CRC32 - checksum.update(input.asInstanceOf[Array[Byte]], 0, input.asInstanceOf[Array[Byte]].length) - checksum.getValue + ExpressionImplUtils.crc32(input.asInstanceOf[Array[Byte]]) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val CRC32 = "java.util.zip.CRC32" - val checksum = ctx.freshName("checksum") + val utils = classOf[ExpressionImplUtils].getName nullSafeCodeGen(ctx, ev, value => { - s""" - $CRC32 $checksum = new $CRC32(); - $checksum.update($value, 0, $value.length); - ${ev.value} = $checksum.getValue(); - """ + s"${ev.value} = $utils.crc32($value);" }) }