From a7f4feb484e80c74a6c92ad01c0f661642825b1b Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 29 May 2026 15:39:57 +0200
Subject: [PATCH 1/4] [SPARK-57032][SQL] Extend timestamp string parsing for
 nanosecond fractional precision

### What changes were proposed in this pull request?

Extend `SparkDateTimeUtils.parseTimestampString` to preserve fractional-second
digits 7-9 in a new output-only slot `segments(9)` (sub-microsecond remainder in
[0, 999]), while keeping `segments(6)` as microseconds so all existing callers are
unaffected. Add package-private parse entry points that return a normalized
`TimestampNanosVal` for `TIMESTAMP_NTZ(p)`/`TIMESTAMP_LTZ(p)` with `p` in [7, 9]:
`stringToTimestampNTZNanos`, `stringToTimestampLTZNanos`, and their ANSI variants.
Fractional digits beyond the target precision `p` are truncated toward zero,
consistent with the existing microsecond parsing behavior.

### Why are the changes needed?

This is the first sub-task of the nanosecond datetime conversion utilities under
SPARK-56822 (SPIP: Timestamps with nanosecond precision). Without it, timestamp
strings with 7-9 fractional digits cannot be converted to the nanosecond-capable
composite representation (epochMicros + nanosWithinMicro).

### Does this PR introduce any user-facing change?

No. Existing `TimestampType`/`TimestampNTZType` string parsing is unchanged; the
new parse APIs are package-private and not yet wired to user-facing casts.

### How was this patch tested?

Added `TimestampNanosParseSuite` covering 7/8/9-digit fractions, per-precision
truncation, NTZ/LTZ, zone suffixes, range edge cases, and ANSI errors. Verified
existing `DateTimeUtilsSuite` and `TimestampFormatterSuite` still pass.
---
 .../catalyst/util/SparkDateTimeUtils.scala    | 138 +++++++++++++-
 .../util/TimestampNanosParseSuite.scala       | 173 ++++++++++++++++++
 2 files changed, 305 insertions(+), 6 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosParseSuite.scala

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
index 9684737a22865..4f04e827ab522 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
@@ -30,8 +30,8 @@ import org.apache.spark.QueryContext
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.RebaseDateTime.{rebaseGregorianToJulianDays, rebaseGregorianToJulianMicros, rebaseJulianToGregorianDays, rebaseJulianToGregorianMicros}
 import org.apache.spark.sql.errors.ExecutionErrors
-import org.apache.spark.sql.types.{DateType, TimestampType, TimeType}
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.sql.types.{DateType, TimestampLTZNanosType, TimestampNTZNanosType, TimestampType, TimeType}
+import org.apache.spark.unsafe.types.{TimestampNanosVal, UTF8String}
 import org.apache.spark.util.SparkClassUtils
 
 trait SparkDateTimeUtils {
@@ -491,6 +491,11 @@ trait SparkDateTimeUtils {
    *     - +|-hhmmss
    *   - Region-based zone IDs in the form `area/city`, such as `Europe/Paris`
    *
+   * Up to 9 fractional-second digits are accepted. Digits 1-6 are kept as microseconds in
+   * `segments(6)` (backward-compatible micro behavior), digits 7-9 are kept as the
+   * sub-microsecond remainder in `segments(9)` (a value in [0, 999]), and digits beyond the 9th
+   * are dropped.
+   *
    * @return
    *   timestamp segments, time zone id and whether the input is just time without a date. If the
    *   input string can't be parsed as timestamp, the result timestamp segments are empty.
@@ -509,7 +514,11 @@ trait SparkDateTimeUtils {
       return (Array.empty, None, false)
     }
     var tz: Option[String] = None
-    val segments: Array[Int] = Array[Int](1, 1, 1, 0, 0, 0, 0, 0, 0)
+    // Indices 0-6 hold year, month, day, hour, minute, second and the microsecond part of the
+    // fractional second (digits 1-6). Index 9 is an output-only slot that holds the
+    // sub-microsecond remainder (fractional digits 7-9) as a value in [0, 999]; it is not touched
+    // by the parsing loop below. Indices 7-8 are used while validating a region-based zone id.
+    val segments: Array[Int] = Array[Int](1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
     var i = 0
     var currentSegmentValue = 0
     var currentSegmentDigits = 0
@@ -522,6 +531,7 @@ trait SparkDateTimeUtils {
     }
 
     var digitsMilli = 0
+    var nanosWithinMicro = 0
     var justTime = false
     var yearSign: Option[Int] = None
     if (bytes(j) == '-' || bytes(j) == '+') {
@@ -604,7 +614,9 @@ trait SparkDateTimeUtils {
             i += 1
           }
         } else {
-          if (i < segments.length && (b == ':' || b == ' ')) {
+          // Bound is fixed at 9 (the original number of parsed segments) so that the trailing
+          // output-only slot segments(9) is never written by the parsing loop.
+          if (i < 9 && (b == ':' || b == ' ')) {
             if (!isValidDigits(i, currentSegmentDigits)) {
               return (Array.empty, None, false)
             }
@@ -620,10 +632,13 @@ trait SparkDateTimeUtils {
         if (i == 6) {
           digitsMilli += 1
         }
-        // We will truncate the nanosecond part if there are more than 6 digits, which results
-        // in loss of precision
         if (i != 6 || currentSegmentDigits < 6) {
+          // Fractional digits 1-6 form the microsecond part stored in segments(6).
           currentSegmentValue = currentSegmentValue * 10 + parsedValue
+        } else if (currentSegmentDigits < 9) {
+          // Fractional digits 7-9 are retained as the sub-microsecond remainder. Digits beyond
+          // the 9th are dropped (loss of precision below the nanosecond grid).
+          nanosWithinMicro = nanosWithinMicro * 10 + parsedValue
         }
         currentSegmentDigits += 1
       }
@@ -640,6 +655,17 @@ trait SparkDateTimeUtils {
       digitsMilli += 1
     }
 
+    // Right-pad the captured sub-microsecond digits (the 7th to 9th fractional digits) so that
+    // segments(9) always holds a value in [0, 999]. The number of captured digits is
+    // clamp(digitsMilli - 6, 0, 3); fewer captured digits means the remainder is left-aligned and
+    // must be scaled up (e.g. ".0000001" -> 100, ".00000012" -> 120, ".000000123" -> 123).
+    var subMicroDigits = math.max(0, math.min(digitsMilli, 9) - 6)
+    while (subMicroDigits < 3) {
+      nanosWithinMicro *= 10
+      subMicroDigits += 1
+    }
+    segments(9) = nanosWithinMicro
+
     // This step also validates time zone part
     val zoneId = tz.map(zoneName => getZoneId(zoneName.trim))
     segments(0) *= yearSign.getOrElse(1)
@@ -713,6 +739,106 @@ trait SparkDateTimeUtils {
     }
   }
 
+  /**
+   * Truncates the sub-microsecond remainder (`segments(9)`, a value in [0, 999]) to the given
+   * fractional-second `precision`. Since microseconds occupy fractional digits 1-6, a `precision`
+   * in [7, 9] only affects the sub-microsecond digits: digits beyond `precision` are dropped
+   * (truncation toward zero, consistent with the microsecond parsing path).
+   */
+  private def truncateNanosWithinMicro(nanosWithinMicro: Int, precision: Int): Short = {
+    val factor = precision match {
+      case 7 => 100
+      case 8 => 10
+      case _ => 1
+    }
+    ((nanosWithinMicro / factor) * factor).toShort
+  }
+
+  /**
+   * Trims and parses a given UTF8 string into a [[TimestampNanosVal]] (epoch microseconds plus a
+   * sub-microsecond remainder in [0, 999]) for `TIMESTAMP_LTZ(precision)` with `precision` in
+   * [7, 9]. Fractional digits beyond `precision` are truncated. The return type is [[Option]] in
+   * order to distinguish between a valid zero value and null. Please refer to
+   * `parseTimestampString` for the allowed formats.
+   */
+  def stringToTimestampLTZNanos(
+      s: UTF8String,
+      precision: Int,
+      timeZoneId: ZoneId): Option[TimestampNanosVal] = {
+    try {
+      val (segments, parsedZoneId, justTime) = parseTimestampString(s)
+      if (segments.isEmpty) {
+        return None
+      }
+      val zoneId = parsedZoneId.getOrElse(timeZoneId)
+      val nanoseconds = MICROSECONDS.toNanos(segments(6))
+      val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
+      val localDate = if (justTime) {
+        LocalDate.now(zoneId)
+      } else {
+        LocalDate.of(segments(0), segments(1), segments(2))
+      }
+      val localDateTime = LocalDateTime.of(localDate, localTime)
+      val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
+      val instant = Instant.from(zonedDateTime)
+      val epochMicros = instantToMicros(instant)
+      Some(TimestampNanosVal.fromParts(
+        epochMicros, truncateNanosWithinMicro(segments(9), precision)))
+    } catch {
+      case NonFatal(_) => None
+    }
+  }
+
+  def stringToTimestampLTZNanosAnsi(
+      s: UTF8String,
+      precision: Int,
+      timeZoneId: ZoneId,
+      context: QueryContext = null): TimestampNanosVal = {
+    stringToTimestampLTZNanos(s, precision, timeZoneId).getOrElse {
+      throw ExecutionErrors.invalidInputInCastToDatetimeError(
+        s, TimestampLTZNanosType(precision), context)
+    }
+  }
+
+  /**
+   * Trims and parses a given UTF8 string into a [[TimestampNanosVal]] (epoch microseconds plus a
+   * sub-microsecond remainder in [0, 999]) for `TIMESTAMP_NTZ(precision)` with `precision` in
+   * [7, 9]. Fractional digits beyond `precision` are truncated. The result is independent of time
+   * zones; a time zone component is discarded when `allowTimeZone` is `true` and rejected (returns
+   * `None`) otherwise. The return type is [[Option]] in order to distinguish between a valid zero
+   * value and null. Please refer to `parseTimestampString` for the allowed formats.
+   */
+  def stringToTimestampNTZNanos(
+      s: UTF8String,
+      precision: Int,
+      allowTimeZone: Boolean = true): Option[TimestampNanosVal] = {
+    try {
+      val (segments, zoneIdOpt, justTime) = parseTimestampString(s)
+      if (segments.isEmpty || justTime || !allowTimeZone && zoneIdOpt.isDefined) {
+        return None
+      }
+      val nanoseconds = MICROSECONDS.toNanos(segments(6))
+      val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
+      val localDate = LocalDate.of(segments(0), segments(1), segments(2))
+      val localDateTime = LocalDateTime.of(localDate, localTime)
+      val epochMicros = localDateTimeToMicros(localDateTime)
+      Some(TimestampNanosVal.fromParts(
+        epochMicros, truncateNanosWithinMicro(segments(9), precision)))
+    } catch {
+      case NonFatal(_) => None
+    }
+  }
+
+  def stringToTimestampNTZNanosAnsi(
+      s: UTF8String,
+      precision: Int,
+      context: QueryContext = null): TimestampNanosVal = {
+    stringToTimestampNTZNanos(s, precision).getOrElse {
+      throw ExecutionErrors.invalidInputInCastToDatetimeError(
+        s, TimestampNTZNanosType(precision), context)
+    }
+  }
+
   /**
    * Trims and parses a given UTF8 string to a corresponding [[Long]] value which representing the
    * number of microseconds since the midnight. The result will be independent of time zones.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosParseSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosParseSuite.scala
new file mode 100644
index 0000000000000..bba3ff576a5fa
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosParseSuite.scala
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import java.time.{ZoneId, ZoneOffset}
+
+import org.apache.spark.{SparkDateTimeException, SparkFunSuite}
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.unsafe.types.{TimestampNanosVal, UTF8String}
+
+/**
+ * Tests for string-to-nanosecond timestamp parsing added under SPARK-57032. The parser keeps the
+ * microsecond part (fractional digits 1-6) and the sub-microsecond remainder (digits 7-9, in
+ * [0, 999]) and applies the target fractional precision `p` in [7, 9] by truncating extra digits.
+ */
+class TimestampNanosParseSuite extends SparkFunSuite {
+
+  private val losAngeles = getZoneId("America/Los_Angeles")
+
+  private def ntz(
+      str: String,
+      precision: Int,
+      allowTimeZone: Boolean = true): Option[TimestampNanosVal] = {
+    stringToTimestampNTZNanos(UTF8String.fromString(str), precision, allowTimeZone)
+  }
+
+  private def ltz(str: String, precision: Int, zoneId: ZoneId): Option[TimestampNanosVal] = {
+    stringToTimestampLTZNanos(UTF8String.fromString(str), precision, zoneId)
+  }
+
+  test("NTZ: fractional digits 7-9 are preserved as nanosWithinMicro") {
+    assert(ntz("2015-01-02 00:00:00.123456789", 9).get ===
+      TimestampNanosVal.fromParts(date(2015, 1, 2, 0, 0, 0, 123456, ZoneOffset.UTC), 789.toShort))
+    assert(ntz("2015-01-02 00:00:00.1234567", 9).get ===
+      TimestampNanosVal.fromParts(date(2015, 1, 2, 0, 0, 0, 123456, ZoneOffset.UTC), 700.toShort))
+    assert(ntz("2015-01-02 00:00:00.12345678", 9).get ===
+      TimestampNanosVal.fromParts(date(2015, 1, 2, 0, 0, 0, 123456, ZoneOffset.UTC), 780.toShort))
+  }
+
+  test("NTZ: precision truncates excess sub-microsecond digits toward zero") {
+    val micros = date(2020, 12, 31, 23, 59, 59, 123456, ZoneOffset.UTC)
+    assert(ntz("2020-12-31 23:59:59.123456789", 9).get ===
+      TimestampNanosVal.fromParts(micros, 789.toShort))
+    assert(ntz("2020-12-31 23:59:59.123456789", 8).get ===
+      TimestampNanosVal.fromParts(micros, 780.toShort))
+    assert(ntz("2020-12-31 23:59:59.123456789", 7).get ===
+      TimestampNanosVal.fromParts(micros, 700.toShort))
+  }
+
+  test("NTZ: digits beyond the 9th are dropped") {
+    val expected = TimestampNanosVal.fromParts(
+      date(2020, 12, 31, 23, 59, 59, 123456, ZoneOffset.UTC), 789.toShort)
+    assert(ntz("2020-12-31 23:59:59.1234567890", 9).get === expected)
+    assert(ntz("2020-12-31 23:59:59.123456789999", 9).get === expected)
+  }
+
+  test("NTZ: fewer than 6 fractional digits yield zero nanosWithinMicro") {
+    assert(ntz("2020-01-01 00:00:00.0", 9).get ===
+      TimestampNanosVal.fromParts(date(2020, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC), 0.toShort))
+    assert(ntz("2020-01-01 00:00:00.1", 9).get ===
+      TimestampNanosVal.fromParts(date(2020, 1, 1, 0, 0, 0, 100000, ZoneOffset.UTC), 0.toShort))
+    assert(ntz("2020-01-01 00:00:00.123456", 9).get ===
+      TimestampNanosVal.fromParts(date(2020, 1, 1, 0, 0, 0, 123456, ZoneOffset.UTC), 0.toShort))
+  }
+
+  test("NTZ: trailing zeros in the sub-microsecond part") {
+    assert(ntz("2015-01-02 00:00:00.000050000", 9).get ===
+      TimestampNanosVal.fromParts(date(2015, 1, 2, 0, 0, 0, 50, ZoneOffset.UTC), 0.toShort))
+    assert(ntz("2015-01-02 00:00:00.100000009", 9).get ===
+      TimestampNanosVal.fromParts(date(2015, 1, 2, 0, 0, 0, 100000, ZoneOffset.UTC), 9.toShort))
+  }
+
+  test("NTZ: maximum and minimum sub-microsecond fractions") {
+    assert(ntz("2020-06-15 12:00:00.999999999", 9).get ===
+      TimestampNanosVal.fromParts(date(2020, 6, 15, 12, 0, 0, 999999, ZoneOffset.UTC), 999.toShort))
+    assert(ntz("2020-06-15 12:00:00.000000001", 9).get ===
+      TimestampNanosVal.fromParts(date(2020, 6, 15, 12, 0, 0, 0, ZoneOffset.UTC), 1.toShort))
+    // ".000000001" loses its only sub-micro digit at precision 8 and 7.
+    assert(ntz("2020-06-15 12:00:00.000000001", 8).get.nanosWithinMicro === 0.toShort)
+    assert(ntz("2020-06-15 12:00:00.000000001", 7).get.nanosWithinMicro === 0.toShort)
+  }
+
+  test("NTZ: time zone component is discarded or rejected based on allowTimeZone") {
+    // With allowTimeZone = true (default) the zone suffix is discarded.
+    assert(ntz("2015-03-18T12:03:17.123456789Z", 9).get ===
+      TimestampNanosVal.fromParts(
+        date(2015, 3, 18, 12, 3, 17, 123456, ZoneOffset.UTC), 789.toShort))
+    // With allowTimeZone = false a zone suffix makes the input invalid.
+    assert(ntz("2015-03-18T12:03:17.123456789Z", 9, allowTimeZone = false).isEmpty)
+    // A time-only input cannot be parsed as TIMESTAMP_NTZ.
+    assert(ntz("12:03:17.123456789", 9).isEmpty)
+  }
+
+  test("LTZ: explicit zone offset in the string") {
+    val expected = TimestampNanosVal.fromParts(
+      date(2015, 3, 18, 12, 3, 17, 123456, getZoneId("+07:00")), 789.toShort)
+    assert(ltz("2015-03-18T12:03:17.123456789+07:00", 9, ZoneOffset.UTC).get === expected)
+  }
+
+  test("LTZ: region-based zone in the string") {
+    val expected = TimestampNanosVal.fromParts(
+      date(2015, 3, 18, 12, 3, 17, 123456, getZoneId("Europe/Moscow")), 789.toShort)
+    assert(ltz("2015-03-18T12:03:17.123456789 Europe/Moscow", 9, ZoneOffset.UTC).get === expected)
+  }
+
+  test("LTZ: falls back to the session zone when the string has no zone") {
+    val expected = TimestampNanosVal.fromParts(
+      date(2015, 3, 18, 12, 3, 17, 123456, losAngeles), 789.toShort)
+    assert(ltz("2015-03-18 12:03:17.123456789", 9, losAngeles).get === expected)
+  }
+
+  test("LTZ: precision truncation matches the NTZ path") {
+    val micros = date(2015, 3, 18, 12, 3, 17, 123456, ZoneOffset.UTC)
+    assert(ltz("2015-03-18T12:03:17.123456789Z", 7, ZoneOffset.UTC).get ===
+      TimestampNanosVal.fromParts(micros, 700.toShort))
+    assert(ltz("2015-03-18T12:03:17.123456789Z", 8, ZoneOffset.UTC).get ===
+      TimestampNanosVal.fromParts(micros, 780.toShort))
+  }
+
+  test("range edge cases with sub-microsecond fractions") {
+    // Unix epoch.
+    assert(ntz("1970-01-01 00:00:00.000000001", 9).get ===
+      TimestampNanosVal.fromParts(0L, 1.toShort))
+    // Julian/Gregorian cutover.
+    assert(ntz("1582-10-15 00:00:00.123456789", 9).get ===
+      TimestampNanosVal.fromParts(date(1582, 10, 15, 0, 0, 0, 123456, ZoneOffset.UTC), 789.toShort))
+    // End of the supported range.
+    assert(ntz("9999-12-31 23:59:59.999999999", 9).get ===
+      TimestampNanosVal.fromParts(
+        date(9999, 12, 31, 23, 59, 59, 999999, ZoneOffset.UTC), 999.toShort))
+  }
+
+  test("invalid inputs return None") {
+    assert(ntz("not a timestamp", 9).isEmpty)
+    assert(ntz("", 9).isEmpty)
+    assert(ltz("2015-13-40 99:99:99.123456789", 9, ZoneOffset.UTC).isEmpty)
+  }
+
+  test("ANSI variants throw on invalid input") {
+    val ntzValid = stringToTimestampNTZNanosAnsi(
+      UTF8String.fromString("2015-01-02 00:00:00.123456789"), 9)
+    assert(ntzValid ===
+      TimestampNanosVal.fromParts(date(2015, 1, 2, 0, 0, 0, 123456, ZoneOffset.UTC), 789.toShort))
+
+    val ltzValid = stringToTimestampLTZNanosAnsi(
+      UTF8String.fromString("2015-01-02 00:00:00.123456789Z"), 9, ZoneOffset.UTC)
+    assert(ltzValid ===
+      TimestampNanosVal.fromParts(date(2015, 1, 2, 0, 0, 0, 123456, ZoneOffset.UTC), 789.toShort))
+
+    intercept[SparkDateTimeException] {
+      stringToTimestampNTZNanosAnsi(UTF8String.fromString("invalid"), 9)
+    }
+    intercept[SparkDateTimeException] {
+      stringToTimestampLTZNanosAnsi(UTF8String.fromString("invalid"), 9, ZoneOffset.UTC)
+    }
+  }
+}

From 95f7e9edce822cd336e2fbe895a40cd24ac74944 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 29 May 2026 15:42:37 +0200
Subject: [PATCH 2/4] Fix coding style

---
 .../catalyst/util/SparkDateTimeUtils.scala    | 32 +++++++++++--------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
index 4f04e827ab522..0bab4c75184d2 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
@@ -756,8 +756,8 @@ trait SparkDateTimeUtils {
 
   /**
    * Trims and parses a given UTF8 string into a [[TimestampNanosVal]] (epoch microseconds plus a
-   * sub-microsecond remainder in [0, 999]) for `TIMESTAMP_LTZ(precision)` with `precision` in
-   * [7, 9]. Fractional digits beyond `precision` are truncated. The return type is [[Option]] in
+   * sub-microsecond remainder in [0, 999]) for `TIMESTAMP_LTZ(precision)` with `precision` in [7,
+   * 9]. Fractional digits beyond `precision` are truncated. The return type is [[Option]] in
    * order to distinguish between a valid zero value and null. Please refer to
    * `parseTimestampString` for the allowed formats.
    */
@@ -782,8 +782,9 @@ trait SparkDateTimeUtils {
       val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
       val instant = Instant.from(zonedDateTime)
       val epochMicros = instantToMicros(instant)
-      Some(TimestampNanosVal.fromParts(
-        epochMicros, truncateNanosWithinMicro(segments(9), precision)))
+      Some(
+        TimestampNanosVal
+          .fromParts(epochMicros, truncateNanosWithinMicro(segments(9), precision)))
     } catch {
       case NonFatal(_) => None
     }
@@ -796,17 +797,19 @@ trait SparkDateTimeUtils {
       context: QueryContext = null): TimestampNanosVal = {
     stringToTimestampLTZNanos(s, precision, timeZoneId).getOrElse {
       throw ExecutionErrors.invalidInputInCastToDatetimeError(
-        s, TimestampLTZNanosType(precision), context)
+        s,
+        TimestampLTZNanosType(precision),
+        context)
     }
   }
 
   /**
    * Trims and parses a given UTF8 string into a [[TimestampNanosVal]] (epoch microseconds plus a
-   * sub-microsecond remainder in [0, 999]) for `TIMESTAMP_NTZ(precision)` with `precision` in
-   * [7, 9]. Fractional digits beyond `precision` are truncated. The result is independent of time
-   * zones; a time zone component is discarded when `allowTimeZone` is `true` and rejected (returns
-   * `None`) otherwise. The return type is [[Option]] in order to distinguish between a valid zero
-   * value and null. Please refer to `parseTimestampString` for the allowed formats.
+   * sub-microsecond remainder in [0, 999]) for `TIMESTAMP_NTZ(precision)` with `precision` in [7,
+   * 9]. Fractional digits beyond `precision` are truncated. The result is independent of time
+   * zones; a time zone component is discarded when `allowTimeZone` is `true` and rejected
+   * (returns `None`) otherwise. The return type is [[Option]] in order to distinguish between a
+   * valid zero value and null. Please refer to `parseTimestampString` for the allowed formats.
    */
   def stringToTimestampNTZNanos(
       s: UTF8String,
@@ -822,8 +825,9 @@ trait SparkDateTimeUtils {
       val localDate = LocalDate.of(segments(0), segments(1), segments(2))
       val localDateTime = LocalDateTime.of(localDate, localTime)
       val epochMicros = localDateTimeToMicros(localDateTime)
-      Some(TimestampNanosVal.fromParts(
-        epochMicros, truncateNanosWithinMicro(segments(9), precision)))
+      Some(
+        TimestampNanosVal
+          .fromParts(epochMicros, truncateNanosWithinMicro(segments(9), precision)))
     } catch {
       case NonFatal(_) => None
     }
@@ -835,7 +839,9 @@ trait SparkDateTimeUtils {
       context: QueryContext = null): TimestampNanosVal = {
     stringToTimestampNTZNanos(s, precision).getOrElse {
       throw ExecutionErrors.invalidInputInCastToDatetimeError(
-        s, TimestampNTZNanosType(precision), context)
+        s,
+        TimestampNTZNanosType(precision),
+        context)
     }
   }
 

From 73bd3befcb9d179406022126262fce9597ede915 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 29 May 2026 16:16:03 +0200
Subject: [PATCH 3/4] Address review findings from SPARK-57032 nanos string
 parsing

- Fix stale `isValidDigits` comment (digits 7-9 are now retained, not truncated)
- Clarify segments(7-8) comment: values are written by loop as `i` advances
  but never read by any caller
- Extend format-string examples in `parseTimestampString` Scaladoc to show
  the optional [ns][ns][ns] digits
- Add precision guard (throws SparkException.internalError) before the
  try/catch in stringToTimestampLTZNanos and stringToTimestampNTZNanos,
  and explicit case 9 + error fallback in truncateNanosWithinMicro
- Add Scaladoc to stringToTimestampNTZNanosAnsi noting that allowTimeZone
  defaults to true (TZ suffix is discarded, not rejected)
- New tests: null input, time-only LTZ, pre-epoch negative timestamps,
  out-of-range precision (checkError / INTERNAL_ERROR), ANSI NTZ TZ-discard

Co-authored-by: Isaac
---
 .../catalyst/util/SparkDateTimeUtils.scala    | 35 +++++++++----
 .../util/TimestampNanosParseSuite.scala       | 50 ++++++++++++++++++-
 2 files changed, 75 insertions(+), 10 deletions(-)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
index 0bab4c75184d2..b5961268b7e04 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
@@ -26,7 +26,7 @@ import java.util.regex.Pattern
 
 import scala.util.control.NonFatal
 
-import org.apache.spark.QueryContext
+import org.apache.spark.{QueryContext, SparkException}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.RebaseDateTime.{rebaseGregorianToJulianDays, rebaseGregorianToJulianMicros, rebaseJulianToGregorianDays, rebaseJulianToGregorianMicros}
 import org.apache.spark.sql.errors.ExecutionErrors
@@ -474,10 +474,10 @@ trait SparkDateTimeUtils {
    * order to distinguish between 0L and null. The following formats are allowed:
    *
    * `[+-]yyyy*` `[+-]yyyy*-[m]m` `[+-]yyyy*-[m]m-[d]d` `[+-]yyyy*-[m]m-[d]d `
-   * `[+-]yyyy*-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
-   * `[+-]yyyy*-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
-   * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
-   * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
+   * `[+-]yyyy*-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][ns][ns][ns][zone_id]`
+   * `[+-]yyyy*-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][ns][ns][ns][zone_id]`
+   * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][ns][ns][ns][zone_id]`
+   * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][ns][ns][ns][zone_id]`
    *
    * where `zone_id` should have one of the forms:
    *   - Z - Zulu time zone UTC+0
@@ -504,7 +504,8 @@ trait SparkDateTimeUtils {
     def isValidDigits(segment: Int, digits: Int): Boolean = {
       // A Long is able to represent a timestamp within [+-]200 thousand years
       val maxDigitsYear = 6
-      // For the nanosecond part, more than 6 digits is allowed, but will be truncated.
+      // Fractional digits 1-6 form microseconds; digits 7-9 are retained as the sub-microsecond
+      // remainder in segments(9); only digits beyond the 9th are dropped.
       segment == 6 || (segment == 0 && digits >= 4 && digits <= maxDigitsYear) ||
       // For the zoneId segment(7), it's could be zero digits when it's a region-based zone ID
       (segment == 7 && digits <= 2) ||
@@ -516,8 +517,9 @@ trait SparkDateTimeUtils {
     var tz: Option[String] = None
     // Indices 0-6 hold year, month, day, hour, minute, second and the microsecond part of the
     // fractional second (digits 1-6). Index 9 is an output-only slot that holds the
-    // sub-microsecond remainder (fractional digits 7-9) as a value in [0, 999]; it is not touched
-    // by the parsing loop below. Indices 7-8 are used while validating a region-based zone id.
+    // sub-microsecond remainder (fractional digits 7-9) as a value in [0, 999]; it is never
+    // written by the parsing loop below. Indices 7-8 are written by the loop as `i` advances
+    // but their values are never read by any caller.
     val segments: Array[Int] = Array[Int](1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
     var i = 0
     var currentSegmentValue = 0
@@ -749,7 +751,10 @@ trait SparkDateTimeUtils {
     val factor = precision match {
       case 7 => 100
       case 8 => 10
-      case _ => 1
+      case 9 => 1
+      case _ =>
+        throw SparkException.internalError(
+          s"truncateNanosWithinMicro called with precision $precision outside [7, 9]")
     }
     ((nanosWithinMicro / factor) * factor).toShort
   }
@@ -765,6 +770,9 @@ trait SparkDateTimeUtils {
       s: UTF8String,
       precision: Int,
       timeZoneId: ZoneId): Option[TimestampNanosVal] = {
+    if (precision < 7 || precision > 9)
+      throw SparkException.internalError(
+        s"stringToTimestampLTZNanos: precision $precision is out of range [7, 9]")
     try {
       val (segments, parsedZoneId, justTime) = parseTimestampString(s)
       if (segments.isEmpty) {
@@ -815,6 +823,9 @@ trait SparkDateTimeUtils {
       s: UTF8String,
       precision: Int,
       allowTimeZone: Boolean = true): Option[TimestampNanosVal] = {
+    if (precision < 7 || precision > 9)
+      throw SparkException.internalError(
+        s"stringToTimestampNTZNanos: precision $precision is out of range [7, 9]")
     try {
       val (segments, zoneIdOpt, justTime) = parseTimestampString(s)
       if (segments.isEmpty || justTime || !allowTimeZone && zoneIdOpt.isDefined) {
@@ -833,6 +844,12 @@ trait SparkDateTimeUtils {
     }
   }
 
+  /**
+   * ANSI variant of [[stringToTimestampNTZNanos]]. Throws [[org.apache.spark.SparkDateTimeException]]
+   * on invalid input. Uses `allowTimeZone = true`: a time zone component in the string is silently
+   * discarded rather than rejected. Callers that need strict NTZ rejection should call
+   * [[stringToTimestampNTZNanos]] directly with `allowTimeZone = false`.
+   */
   def stringToTimestampNTZNanosAnsi(
       s: UTF8String,
       precision: Int,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosParseSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosParseSuite.scala
index bba3ff576a5fa..4b31e38f0f905 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosParseSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosParseSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util
 
 import java.time.{ZoneId, ZoneOffset}
 
-import org.apache.spark.{SparkDateTimeException, SparkFunSuite}
+import org.apache.spark.{SparkDateTimeException, SparkException, SparkFunSuite}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.unsafe.types.{TimestampNanosVal, UTF8String}
@@ -146,12 +146,60 @@ class TimestampNanosParseSuite extends SparkFunSuite {
         date(9999, 12, 31, 23, 59, 59, 999999, ZoneOffset.UTC), 999.toShort))
   }
 
+  test("null input returns None") {
+    assert(stringToTimestampNTZNanos(null, 9).isEmpty)
+    assert(stringToTimestampLTZNanos(null, 9, ZoneOffset.UTC).isEmpty)
+  }
+
   test("invalid inputs return None") {
     assert(ntz("not a timestamp", 9).isEmpty)
     assert(ntz("", 9).isEmpty)
     assert(ltz("2015-13-40 99:99:99.123456789", 9, ZoneOffset.UTC).isEmpty)
   }
 
+  test("LTZ: time-only input uses the session zone's current date") {
+    // Time-only strings are accepted by the LTZ path (date is filled with LocalDate.now);
+    // they are rejected by the NTZ path because the date is indeterminate.
+    val result = ltz("12:03:17.123456789", 9, ZoneOffset.UTC)
+    assert(result.isDefined)
+    assert(result.get.nanosWithinMicro === 789.toShort)
+    assert(ntz("12:03:17.123456789", 9).isEmpty)
+  }
+
+  test("pre-epoch (negative) timestamps with sub-microsecond fractions") {
+    // Exercises the yearSign path together with segments(9).
+    assert(ntz("-0001-01-01 00:00:00.000000001", 9).get ===
+      TimestampNanosVal.fromParts(
+        date(-1, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC), 1.toShort))
+    assert(ntz("1582-10-14 23:59:59.999999999", 9).get ===
+      TimestampNanosVal.fromParts(
+        date(1582, 10, 14, 23, 59, 59, 999999, ZoneOffset.UTC), 999.toShort))
+  }
+
+  test("truncateNanosWithinMicro throws internalError for out-of-range precision") {
+    // Precision must be in [7, 9]; anything outside is a caller bug and should surface loudly.
+    Seq(0, 6, 10, -1).foreach { p =>
+      checkError(
+        exception = intercept[SparkException] {
+          stringToTimestampNTZNanos(
+            UTF8String.fromString("2020-01-01 00:00:00.123456789"), p)
+        },
+        condition = "INTERNAL_ERROR",
+        parameters = Map(
+          "message" -> s"stringToTimestampNTZNanos: precision $p is out of range [7, 9]"))
+    }
+  }
+
+  test("ANSI NTZ: time zone component in the string is silently discarded") {
+    // allowTimeZone defaults to true in the ANSI variant: the zone suffix is dropped, not
+    // rejected. Callers that need strict rejection must use stringToTimestampNTZNanos directly
+    // with allowTimeZone = false.
+    val result = stringToTimestampNTZNanosAnsi(
+      UTF8String.fromString("2015-03-18T12:03:17.123456789Z"), 9)
+    assert(result ===
+      TimestampNanosVal.fromParts(date(2015, 3, 18, 12, 3, 17, 123456, ZoneOffset.UTC), 789.toShort))
+  }
+
   test("ANSI variants throw on invalid input") {
     val ntzValid = stringToTimestampNTZNanosAnsi(
       UTF8String.fromString("2015-01-02 00:00:00.123456789"), 9)

From a3aafc154ea46d91c05850d006aec354a61c4e08 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 29 May 2026 20:21:07 +0200
Subject: [PATCH 4/4] Fix scalastyle violations in nanos string parsing code

Co-authored-by: Isaac
---
 .../sql/catalyst/util/SparkDateTimeUtils.scala    | 15 +++++++++------
 .../catalyst/util/TimestampNanosParseSuite.scala  |  3 ++-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
index b5961268b7e04..09180b1dc97b5 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
@@ -770,9 +770,10 @@ trait SparkDateTimeUtils {
       s: UTF8String,
       precision: Int,
       timeZoneId: ZoneId): Option[TimestampNanosVal] = {
-    if (precision < 7 || precision > 9)
+    if (precision < 7 || precision > 9) {
       throw SparkException.internalError(
         s"stringToTimestampLTZNanos: precision $precision is out of range [7, 9]")
+    }
     try {
       val (segments, parsedZoneId, justTime) = parseTimestampString(s)
       if (segments.isEmpty) {
@@ -823,9 +824,10 @@ trait SparkDateTimeUtils {
       s: UTF8String,
       precision: Int,
       allowTimeZone: Boolean = true): Option[TimestampNanosVal] = {
-    if (precision < 7 || precision > 9)
+    if (precision < 7 || precision > 9) {
       throw SparkException.internalError(
         s"stringToTimestampNTZNanos: precision $precision is out of range [7, 9]")
+    }
     try {
       val (segments, zoneIdOpt, justTime) = parseTimestampString(s)
       if (segments.isEmpty || justTime || !allowTimeZone && zoneIdOpt.isDefined) {
@@ -845,10 +847,11 @@ trait SparkDateTimeUtils {
   }
 
   /**
-   * ANSI variant of [[stringToTimestampNTZNanos]]. Throws [[org.apache.spark.SparkDateTimeException]]
-   * on invalid input. Uses `allowTimeZone = true`: a time zone component in the string is silently
-   * discarded rather than rejected. Callers that need strict NTZ rejection should call
-   * [[stringToTimestampNTZNanos]] directly with `allowTimeZone = false`.
+   * ANSI variant of [[stringToTimestampNTZNanos]]. Throws
+   * [[org.apache.spark.SparkDateTimeException]] on invalid input. Uses `allowTimeZone = true`: a
+   * time zone component in the string is silently discarded rather than rejected. Callers that
+   * need strict NTZ rejection should call [[stringToTimestampNTZNanos]] directly with
+   * `allowTimeZone = false`.
    */
   def stringToTimestampNTZNanosAnsi(
       s: UTF8String,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosParseSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosParseSuite.scala
index 4b31e38f0f905..e5e8f05c69542 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosParseSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosParseSuite.scala
@@ -197,7 +197,8 @@ class TimestampNanosParseSuite extends SparkFunSuite {
     val result = stringToTimestampNTZNanosAnsi(
       UTF8String.fromString("2015-03-18T12:03:17.123456789Z"), 9)
     assert(result ===
-      TimestampNanosVal.fromParts(date(2015, 3, 18, 12, 3, 17, 123456, ZoneOffset.UTC), 789.toShort))
+      TimestampNanosVal.fromParts(
+        date(2015, 3, 18, 12, 3, 17, 123456, ZoneOffset.UTC), 789.toShort))
   }
 
   test("ANSI variants throw on invalid input") {