From 7f2bc90bc55e8ed7bfc752e6e104c46d4e69b0b6 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Sat, 30 May 2026 11:14:41 +0000 Subject: [PATCH] [SPARK-57174][SQL] Simplify Chr codegen by extracting a static Java helper ### What changes were proposed in this pull request? Add `ExpressionImplUtils.chr(long longVal)` and route `Chr`'s eval and codegen paths through it. `Chr.doGenCode` previously emitted a ~7-line inline if/else chain (negative -> empty string, `(v & 0xFF) == 0` -> NUL, otherwise the Latin-1 character); it now emits a single `ExpressionImplUtils.chr(...)` call, and the eval path calls the same helper. This is a plain (non-ANSI, non-try/catch) type-independent block, in line with the broadened goal of SPARK-56908 to move fixed generated-Java logic into static Java helpers. ### Why are the changes needed? Part of SPARK-56908 (umbrella). Collapsing the inline if/else chain to one call shrinks the generated Java for every stage that uses `chr`. ### Does this PR introduce _any_ user-facing change? No. The compiled behavior is identical; only the emitted Java source text changes. ### How was this patch tested? ``` build/sbt "catalyst/testOnly *StringExpressionsSuite" ``` 59/59 pass, including `string for ascii` which covers `Chr` over negative, zero, wrap-around (256), high-bit (149) and null inputs (exercised both with and without whole-stage codegen). ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Claude Code (Opus 4.8) Co-authored-by: Isaac --- .../expressions/ExpressionImplUtils.java | 17 ++++++++++++++ .../expressions/stringExpressions.scala | 23 +++---------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java index a5228edc33c83..ded5c96525e22 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java @@ -342,4 +342,21 @@ public static UTF8String quote(UTF8String str) { String sp = str.toString().replaceAll(qtChar, qtCharRep); return UTF8String.fromString(qtChar + sp + qtChar); } + + /** + * Returns the single-character string for the {@code chr} expression: the + * ASCII/Latin-1 character for {@code longVal & 0xFF}. A negative argument + * yields the empty string. Shared by the eval and codegen paths so the + * generated Java is a single call rather than an inline if/else chain. + */ + public static UTF8String chr(long longVal) { + if (longVal < 0) { + return UTF8String.EMPTY_UTF8; + } else if ((longVal & 0xFF) == 0) { + return UTF8String.fromString(String.valueOf(Character.MIN_VALUE)); + } else { + char c = (char) (longVal & 0xFF); + return UTF8String.fromString(String.valueOf(c)); + } + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 5b5a63812dcab..5c6e457421bf6 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -2822,31 +2822,14 @@ case class Chr(child: Expression) override def inputTypes: Seq[DataType] = Seq(LongType) protected override def nullSafeEval(lon: Any): Any = { - val longVal = lon.asInstanceOf[Long] - if (longVal < 0) { - UTF8String.EMPTY_UTF8 - } else if ((longVal & 0xFF) == 0) { - UTF8String.fromString(Character.MIN_VALUE.toString) - } else { - UTF8String.fromString((longVal & 0xFF).toChar.toString) - } + ExpressionImplUtils.chr(lon.asInstanceOf[Long]) } override def contextIndependentFoldable: Boolean = child.contextIndependentFoldable override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - nullSafeCodeGen(ctx, ev, lon => { - s""" - if ($lon < 0) { - ${ev.value} = UTF8String.EMPTY_UTF8; - } else if (($lon & 0xFF) == 0) { - ${ev.value} = UTF8String.fromString(String.valueOf(Character.MIN_VALUE)); - } else { - char c = (char)($lon & 0xFF); - ${ev.value} = UTF8String.fromString(String.valueOf(c)); - } - """ - }) + val utils = classOf[ExpressionImplUtils].getName + nullSafeCodeGen(ctx, ev, lon => s"${ev.value} = $utils.chr($lon);") } override protected def withNewChildInternal(newChild: Expression): Chr = copy(child = newChild)