diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java index 0413278d0cb86..65d479bcea4a3 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java @@ -17,11 +17,13 @@ package org.apache.spark.sql.catalyst.expressions; +import java.text.ParseException; import java.time.DateTimeException; import java.time.LocalDate; import org.apache.spark.sql.catalyst.util.DateTimeUtils; import org.apache.spark.sql.catalyst.util.IntervalUtils; +import org.apache.spark.sql.catalyst.util.TimestampFormatter; import org.apache.spark.sql.errors.QueryExecutionErrors; import org.apache.spark.sql.types.Decimal; import org.apache.spark.unsafe.types.CalendarInterval; @@ -68,4 +70,32 @@ public static CalendarInterval makeIntervalExact( throw QueryExecutionErrors.arithmeticOverflowError(e.getMessage(), "", null); } } + + /** + * Parses {@code input} to a timestamp for {@code ToTimestamp} expressions + * (e.g. {@code to_timestamp}) in ANSI mode ({@code failOnError = true}). + * For a TIMESTAMP_NTZ result the formatter's {@code parseWithoutTimeZone} is + * used and {@code downScaleFactor} is not applied; otherwise the parsed micros + * are divided by {@code downScaleFactor}. A {@link DateTimeException} (which + * also covers {@code DateTimeParseException}) or a {@link ParseException} is + * translated to {@code ansiDateTimeParseError} carrying the suggested + * fall-back function; any other exception (e.g. {@code IllegalStateException}) + * propagates unchanged. + */ + public static long parseToTimestampExact( + TimestampFormatter formatter, + String input, + long downScaleFactor, + boolean forTimestampNTZ, + String suggestedFuncOnFail) { + try { + if (forTimestampNTZ) { + return formatter.parseWithoutTimeZone(input); + } else { + return formatter.parse(input) / downScaleFactor; + } + } catch (DateTimeException | ParseException e) { + throw QueryExecutionErrors.ansiDateTimeParseError(e, suggestedFuncOnFail); + } + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index a724f02cd107e..57b90f204994a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1324,18 +1324,20 @@ abstract class ToTimestamp null } else { val formatter = formatterOption.getOrElse(getFormatter(fmt.toString)) - try { - if (forTimestampNTZ) { - formatter.parseWithoutTimeZone(t.asInstanceOf[UTF8String].toString) - } else { - formatter.parse(t.asInstanceOf[UTF8String].toString) / downScaleFactor + val str = t.asInstanceOf[UTF8String].toString + if (failOnError) { + DateTimeExpressionUtils.parseToTimestampExact( + formatter, str, downScaleFactor, forTimestampNTZ, suggestedFuncOnFail) + } else { + try { + if (forTimestampNTZ) { + formatter.parseWithoutTimeZone(str) + } else { + formatter.parse(str) / downScaleFactor + } + } catch { + case e if isParseError(e) => null } - } catch { - case e: DateTimeException if failOnError => - throw QueryExecutionErrors.ansiDateTimeParseError(e, suggestedFuncOnFail) - case e: ParseException if failOnError => - throw QueryExecutionErrors.ansiDateTimeParseError(e, suggestedFuncOnFail) - case e if isParseError(e) => null } } } @@ -1344,11 +1346,7 @@ abstract class ToTimestamp override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val javaType = CodeGenerator.javaType(dataType) - val parseErrorBranch: String = if (failOnError) { - s"throw QueryExecutionErrors.ansiDateTimeParseError(e, \"${suggestedFuncOnFail}\");" - } else { - s"${ev.isNull} = true;" - } + val utils = classOf[DateTimeExpressionUtils].getName val parseMethod = if (forTimestampNTZ) { "parseWithoutTimeZone" } else { @@ -1359,21 +1357,35 @@ abstract class ToTimestamp } else { s"/ $downScaleFactor" } + // Emits the string -> timestamp parse body. The ANSI (failOnError) branch + // delegates the parse and the parse-error -> ANSI error translation to + // DateTimeExpressionUtils.parseToTimestampExact, collapsing the inline + // try/catch to a single call. The non-ANSI branch keeps the inline + // try/catch that maps a parse failure to a null result. + def parseTimestampCode(formatterExpr: String, inputExpr: String): String = { + if (failOnError) { + s"""${ev.value} = $utils.parseToTimestampExact(""" + + s"""$formatterExpr, $inputExpr, ${downScaleFactor}L, $forTimestampNTZ, """ + + s""""$suggestedFuncOnFail");""" + } else { + s""" + |try { + | ${ev.value} = $formatterExpr.$parseMethod($inputExpr) $downScaleCode; + |} catch (java.time.DateTimeException e) { + | ${ev.isNull} = true; + |} catch (java.text.ParseException e) { + | ${ev.isNull} = true; + |} + |""".stripMargin + } + } left.dataType match { case _: StringType => formatterOption.map { fmt => val df = classOf[TimestampFormatter].getName val formatterName = ctx.addReferenceObj("formatter", fmt, df) nullSafeCodeGen(ctx, ev, (datetimeStr, _) => - s""" - |try { - | ${ev.value} = $formatterName.$parseMethod($datetimeStr.toString()) $downScaleCode; - |} catch (java.time.DateTimeException e) { - | ${parseErrorBranch} - |} catch (java.text.ParseException e) { - | ${parseErrorBranch} - |} - |""".stripMargin) + parseTimestampCode(formatterName, s"$datetimeStr.toString()")) }.getOrElse { val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) val tf = TimestampFormatter.getClass.getName.stripSuffix("$") @@ -1386,13 +1398,7 @@ abstract class ToTimestamp | $zid, | $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT(), | true); - |try { - | ${ev.value} = $timestampFormatter.$parseMethod($string.toString()) $downScaleCode; - |} catch (java.time.DateTimeException e) { - | ${parseErrorBranch} - |} catch (java.text.ParseException e) { - | ${parseErrorBranch} - |} + |${parseTimestampCode(timestampFormatter, s"$string.toString()")} |""".stripMargin) } case TimestampType | TimestampNTZType =>