This is an automated email from the ASF dual-hosted git repository.

MaxGekk pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.x by this push:
     new addbe79d21b9 [SPARK-57163][SQL] Map TIMESTAMP_LTZ(6) and 
TIMESTAMP_NTZ(6) to TimestampType and TimestampNTZType
addbe79d21b9 is described below

commit addbe79d21b91a5255535d4540c24c5bcdda2bfa
Author: Maxim Gekk <[email protected]>
AuthorDate: Thu Jun 4 07:47:05 2026 +0200

    [SPARK-57163][SQL] Map TIMESTAMP_LTZ(6) and TIMESTAMP_NTZ(6) to 
TimestampType and TimestampNTZType
    
    ### What changes were proposed in this pull request?
    
    Map the microsecond fractional precision `6` of the parameterized timestamp 
spellings to the existing GA timestamp types:
    
    - `TIMESTAMP_NTZ(6)` -> `TimestampNTZType`
    - `TIMESTAMP_LTZ(6)` -> `TimestampType`
    - `TIMESTAMP(6) WITHOUT TIME ZONE` -> `TimestampNTZType`
    - `TIMESTAMP(6) WITH LOCAL TIME ZONE` -> `TimestampType`
    - `TIMESTAMP(6)` -> the session default type (`spark.sql.timestampType`)
    
    Both parsing surfaces are updated to short-circuit precision `6` to the GA 
types before the preview-flag check:
    
    1. `sql/api/.../catalyst/parser/DataTypeAstBuilder.scala` - the SQL DDL 
parser (`parseTimestampLtzNanosPrecision` / `parseTimestampNtzNanosPrecision`, 
which also back the bare and zoned `TIMESTAMP(p)` cases).
    2. `sql/api/.../sql/types/DataType.scala` - `nameToType`, the 
typeName/JSON-string parser (`TIMESTAMP_LTZ_NANOS_TYPE` / 
`TIMESTAMP_NTZ_NANOS_TYPE` branches).
    
    Precision `6` is accepted regardless of 
`spark.sql.timestampNanosTypes.enabled`, since it resolves to a GA type. 
Precision in `[7, 9]` keeps mapping to the nanosecond-capable types; everything 
else still throws `INVALID_TIMESTAMP_PRECISION`. The 
`INVALID_TIMESTAMP_PRECISION` error message is updated to reflect that `6` is 
now valid.
    
    Out of scope: precision in `[0, 5]` (rounding/truncation semantics, left 
for a follow-up) and any change to the nanosecond-capable types (`[7, 9]`).
    
    ### Why are the changes needed?
    
    This is a sub-task of SPARK-56822 (SPIP: Timestamps with nanosecond 
precision). Microsecond precision (p = 6) is exactly what `TimestampType` and 
`TimestampNTZType` already model. Previously `TIMESTAMP_NTZ(6)` / 
`TIMESTAMP_LTZ(6)` were rejected with `INVALID_TIMESTAMP_PRECISION`, which is 
surprising: an explicit `(6)` should be accepted and resolve to the equivalent 
microsecond type, giving users a consistent precision model where p = 6 means 
microseconds.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes. Against unreleased master, `TIMESTAMP_NTZ(6)`, `TIMESTAMP_LTZ(6)`, 
`TIMESTAMP(6) WITHOUT TIME ZONE`, `TIMESTAMP(6) WITH LOCAL TIME ZONE`, and bare 
`TIMESTAMP(6)` are now accepted and resolve to the GA microsecond types instead 
of throwing `INVALID_TIMESTAMP_PRECISION`. The `INVALID_TIMESTAMP_PRECISION` 
message is updated accordingly.
    
    ### How was this patch tested?
    
    Updated and added positive/negative cases in `DataTypeParserSuite` and 
`DataTypeSuite`. Ran:
    
    ```
    build/sbt 'catalyst/testOnly *DataTypeParserSuite *DataTypeSuite'
    ```
    
    Also ran scalastyle for `sql-api` and `catalyst` (0 errors).
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Cursor 1.0
    
    Closes #56302 from MaxGekk/timestamp-6.
    
    Authored-by: Maxim Gekk <[email protected]>
    Signed-off-by: Max Gekk <[email protected]>
    (cherry picked from commit d8e1210838f677660facc41ae530f053c6a16a07)
    Signed-off-by: Max Gekk <[email protected]>
---
 .../src/main/resources/error/error-conditions.json |  2 +-
 .../sql/catalyst/parser/DataTypeAstBuilder.scala   | 44 +++++++++++----
 .../org/apache/spark/sql/types/DataType.scala      | 46 ++++++++++++----
 .../sql/catalyst/parser/DataTypeParserSuite.scala  | 64 ++++++++++++++--------
 .../org/apache/spark/sql/types/DataTypeSuite.scala | 43 +++++++++++++--
 5 files changed, 149 insertions(+), 50 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-conditions.json 
b/common/utils/src/main/resources/error/error-conditions.json
index a16224a83341..734e0335472b 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -4946,7 +4946,7 @@
   },
   "INVALID_TIMESTAMP_PRECISION" : {
     "message" : [
-      "The seconds precision <precision> of <type> is invalid. Expected an 
integer in [7, 9], or parameterless <type> for precision <= 6."
+      "The seconds precision <precision> of <type> is invalid. Expected an 
integer in [7, 9] for nanosecond precision; use precision 6 or parameterless 
<type> for the standard microsecond type."
     ],
     "sqlState" : "22023"
   },
diff --git 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
index 9de6aceb757b..4749d920e593 100644
--- 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
+++ 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
@@ -477,22 +477,42 @@ class DataTypeAstBuilder extends 
SqlBaseParserBaseVisitor[AnyRef] with DataTypeE
     }
   }
 
-  private def parseTimestampLtzNanosPrecision(precision: String): 
TimestampLTZNanosType = {
-    DataTypeErrors.checkTimestampNanosTypesEnabled()
-    try TimestampLTZNanosType(precision.toInt)
-    catch {
-      case _: NumberFormatException =>
-        throw DataTypeErrors.invalidTimestampPrecisionError(precision, 
"TIMESTAMP_LTZ")
+  private def parseTimestampLtzNanosPrecision(precision: String): DataType = {
+    val p =
+      try precision.toInt
+      catch {
+        case _: NumberFormatException =>
+          throw DataTypeErrors.invalidTimestampPrecisionError(precision, 
"TIMESTAMP_LTZ")
+      }
+    // Precision 6 (microseconds) maps to the GA type and is accepted 
regardless of the
+    // nanos timestamp types preview flag.
+    if (p == 6) return TimestampType
+    // Reject out-of-range precisions before the feature-flag check so the 
error is always
+    // INVALID_TIMESTAMP_PRECISION, not FEATURE_NOT_ENABLED.
+    if (p < TimestampLTZNanosType.MIN_PRECISION || p > 
TimestampLTZNanosType.MAX_PRECISION) {
+      throw DataTypeErrors.invalidTimestampPrecisionError(precision, 
"TIMESTAMP_LTZ")
     }
+    DataTypeErrors.checkTimestampNanosTypesEnabled()
+    TimestampLTZNanosType(p)
   }
 
-  private def parseTimestampNtzNanosPrecision(precision: String): 
TimestampNTZNanosType = {
-    DataTypeErrors.checkTimestampNanosTypesEnabled()
-    try TimestampNTZNanosType(precision.toInt)
-    catch {
-      case _: NumberFormatException =>
-        throw DataTypeErrors.invalidTimestampPrecisionError(precision, 
"TIMESTAMP_NTZ")
+  private def parseTimestampNtzNanosPrecision(precision: String): DataType = {
+    val p =
+      try precision.toInt
+      catch {
+        case _: NumberFormatException =>
+          throw DataTypeErrors.invalidTimestampPrecisionError(precision, 
"TIMESTAMP_NTZ")
+      }
+    // Precision 6 (microseconds) maps to the GA type and is accepted 
regardless of the
+    // nanos timestamp types preview flag.
+    if (p == 6) return TimestampNTZType
+    // Reject out-of-range precisions before the feature-flag check so the 
error is always
+    // INVALID_TIMESTAMP_PRECISION, not FEATURE_NOT_ENABLED.
+    if (p < TimestampNTZNanosType.MIN_PRECISION || p > 
TimestampNTZNanosType.MAX_PRECISION) {
+      throw DataTypeErrors.invalidTimestampPrecisionError(precision, 
"TIMESTAMP_NTZ")
     }
+    DataTypeErrors.checkTimestampNanosTypesEnabled()
+    TimestampNTZNanosType(p)
   }
 
   /**
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
index c1d1430bacee..319d600788fa 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -236,18 +236,44 @@ object DataType {
       // For backwards compatibility, previously the type name of NullType is 
"null"
       case "null" => NullType
       case TIMESTAMP_LTZ_NANOS_TYPE(precision) =>
-        DataTypeErrors.checkTimestampNanosTypesEnabled()
-        try TimestampLTZNanosType(precision.toInt)
-        catch {
-          case _: NumberFormatException =>
-            throw DataTypeErrors.invalidTimestampPrecisionError(precision, 
"TIMESTAMP_LTZ")
+        val p =
+          try precision.toInt
+          catch {
+            case _: NumberFormatException =>
+              throw DataTypeErrors.invalidTimestampPrecisionError(precision, 
"TIMESTAMP_LTZ")
+          }
+        // Precision 6 (microseconds) maps to the GA type and is accepted 
regardless of the
+        // nanos timestamp types preview flag.
+        if (p == 6) {
+          TimestampType
+        } else if (p < TimestampLTZNanosType.MIN_PRECISION ||
+          p > TimestampLTZNanosType.MAX_PRECISION) {
+          // Reject out-of-range precisions before the feature-flag check so 
the error is always
+          // INVALID_TIMESTAMP_PRECISION, not FEATURE_NOT_ENABLED.
+          throw DataTypeErrors.invalidTimestampPrecisionError(precision, 
"TIMESTAMP_LTZ")
+        } else {
+          DataTypeErrors.checkTimestampNanosTypesEnabled()
+          TimestampLTZNanosType(p)
         }
       case TIMESTAMP_NTZ_NANOS_TYPE(precision) =>
-        DataTypeErrors.checkTimestampNanosTypesEnabled()
-        try TimestampNTZNanosType(precision.toInt)
-        catch {
-          case _: NumberFormatException =>
-            throw DataTypeErrors.invalidTimestampPrecisionError(precision, 
"TIMESTAMP_NTZ")
+        val p =
+          try precision.toInt
+          catch {
+            case _: NumberFormatException =>
+              throw DataTypeErrors.invalidTimestampPrecisionError(precision, 
"TIMESTAMP_NTZ")
+          }
+        // Precision 6 (microseconds) maps to the GA type and is accepted 
regardless of the
+        // nanos timestamp types preview flag.
+        if (p == 6) {
+          TimestampNTZType
+        } else if (p < TimestampNTZNanosType.MIN_PRECISION ||
+          p > TimestampNTZNanosType.MAX_PRECISION) {
+          // Reject out-of-range precisions before the feature-flag check so 
the error is always
+          // INVALID_TIMESTAMP_PRECISION, not FEATURE_NOT_ENABLED.
+          throw DataTypeErrors.invalidTimestampPrecisionError(precision, 
"TIMESTAMP_NTZ")
+        } else {
+          DataTypeErrors.checkTimestampNanosTypesEnabled()
+          TimestampNTZNanosType(p)
         }
       case "timestamp_ltz" => TimestampType
       case other =>
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index 6543b209ccd8..4bb243bfe464 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -68,6 +68,11 @@ class DataTypeParserSuite extends SparkFunSuite with 
SQLHelper {
   checkDataType("TIMESTAMP WITHOUT TIME ZONE", TimestampNTZType)
   checkDataType("timestamp_ntz", TimestampNTZType)
   checkDataType("timestamp_ltz", TimestampType)
+  // Precision 6 (microseconds) maps to the GA types and needs no preview flag.
+  checkDataType("TIMESTAMP_NTZ(6)", TimestampNTZType)
+  checkDataType("TIMESTAMP_LTZ(6)", TimestampType)
+  checkDataType("TIMESTAMP(6) WITHOUT TIME ZONE", TimestampNTZType)
+  checkDataType("TIMESTAMP(6) WITH LOCAL TIME ZONE", TimestampType)
   checkDataType("string", StringType)
   checkDataType("ChaR(5)", CharType(5))
   checkDataType("ChaRacter(5)", CharType(5))
@@ -161,35 +166,35 @@ class DataTypeParserSuite extends SparkFunSuite with 
SQLHelper {
       assert(parse("timestamp") === TimestampNTZType)
       assert(parse("timestamp with local time zone") === TimestampType)
       assert(parse("timestamp without time zone") === TimestampNTZType)
+      // Bare TIMESTAMP(6) resolves to the session default GA type, no preview 
flag needed.
+      assert(parse("timestamp(6)") === TimestampNTZType)
       withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") {
         assert(parse("timestamp(9)") === TimestampNTZNanosType(9))
         // Bare TIMESTAMP(p) routes through SqlApiConf.get.timestampType, so an
         // out-of-range precision must surface as the NTZ error here.
-        Seq("6", "10").foreach { p =>
-          checkError(
-            exception = intercept[SparkException] {
-              CatalystSqlParser.parseDataType(s"timestamp($p)")
-            },
-            condition = "INVALID_TIMESTAMP_PRECISION",
-            parameters = Map("precision" -> p, "type" -> "TIMESTAMP_NTZ"))
-        }
+        checkError(
+          exception = intercept[SparkException] {
+            CatalystSqlParser.parseDataType("timestamp(10)")
+          },
+          condition = "INVALID_TIMESTAMP_PRECISION",
+          parameters = Map("precision" -> "10", "type" -> "TIMESTAMP_NTZ"))
       }
     }
     withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> 
TimestampTypes.TIMESTAMP_LTZ.toString) {
       assert(parse("timestamp") === TimestampType)
       assert(parse("timestamp with local time zone") === TimestampType)
       assert(parse("timestamp without time zone") === TimestampNTZType)
+      // Bare TIMESTAMP(6) resolves to the session default GA type, no preview 
flag needed.
+      assert(parse("timestamp(6)") === TimestampType)
       withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") {
         assert(parse("timestamp(9)") === TimestampLTZNanosType(9))
         // Bare TIMESTAMP(p) under LTZ default must surface as the LTZ error.
-        Seq("6", "10").foreach { p =>
-          checkError(
-            exception = intercept[SparkException] {
-              CatalystSqlParser.parseDataType(s"timestamp($p)")
-            },
-            condition = "INVALID_TIMESTAMP_PRECISION",
-            parameters = Map("precision" -> p, "type" -> "TIMESTAMP_LTZ"))
-        }
+        checkError(
+          exception = intercept[SparkException] {
+            CatalystSqlParser.parseDataType("timestamp(10)")
+          },
+          condition = "INVALID_TIMESTAMP_PRECISION",
+          parameters = Map("precision" -> "10", "type" -> "TIMESTAMP_LTZ"))
       }
     }
   }
@@ -235,6 +240,23 @@ class DataTypeParserSuite extends SparkFunSuite with 
SQLHelper {
       assert(parse("TIMESTAMP_LTZ") === TimestampType)
       assert(parse("TIMESTAMP WITHOUT TIME ZONE") === TimestampNTZType)
       assert(parse("TIMESTAMP WITH LOCAL TIME ZONE") === TimestampType)
+      // Precision 6 maps to the GA types and stays accepted with the gate off.
+      assert(parse("TIMESTAMP_NTZ(6)") === TimestampNTZType)
+      assert(parse("TIMESTAMP_LTZ(6)") === TimestampType)
+      assert(parse("TIMESTAMP(6) WITHOUT TIME ZONE") === TimestampNTZType)
+      assert(parse("TIMESTAMP(6) WITH LOCAL TIME ZONE") === TimestampType)
+      // Out-of-range precisions surface as INVALID_TIMESTAMP_PRECISION 
regardless of the flag.
+      Seq("TIMESTAMP_NTZ" -> "TIMESTAMP_NTZ", "TIMESTAMP_LTZ" -> 
"TIMESTAMP_LTZ").foreach {
+        case (spelling, errorType) =>
+          Seq(0, 1, 5, 10, 99).foreach { p =>
+            checkError(
+              exception = intercept[SparkException] {
+                CatalystSqlParser.parseDataType(s"$spelling($p)")
+              },
+              condition = "INVALID_TIMESTAMP_PRECISION",
+              parameters = Map("precision" -> p.toString, "type" -> errorType))
+          }
+      }
     }
   }
 
@@ -315,7 +337,9 @@ class DataTypeParserSuite extends SparkFunSuite with 
SQLHelper {
     withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") {
       Seq("TIMESTAMP_NTZ" -> "TIMESTAMP_NTZ", "TIMESTAMP_LTZ" -> 
"TIMESTAMP_LTZ").foreach {
         case (spelling, errorType) =>
-          Seq(0, 1, 6, 10, 99).foreach { p =>
+          // Precision 6 is valid (maps to the GA type); only [0, 5] and [10, 
...] are invalid.
+          // Precision 5 is included to pin the lower boundary of the p=6 
carve-out.
+          Seq(0, 1, 5, 10, 99).foreach { p =>
             checkError(
               exception = intercept[SparkException] {
                 CatalystSqlParser.parseDataType(s"$spelling($p)")
@@ -332,12 +356,6 @@ class DataTypeParserSuite extends SparkFunSuite with 
SQLHelper {
         condition = "INVALID_TIMESTAMP_PRECISION",
         parameters = Map("precision" -> "99999999999", "type" -> 
"TIMESTAMP_NTZ"))
       // TIMESTAMP(p) with zone aliases route to the corresponding nanos 
type's error.
-      checkError(
-        exception = intercept[SparkException] {
-          CatalystSqlParser.parseDataType("TIMESTAMP(6) WITHOUT TIME ZONE")
-        },
-        condition = "INVALID_TIMESTAMP_PRECISION",
-        parameters = Map("precision" -> "6", "type" -> "TIMESTAMP_NTZ"))
       checkError(
         exception = intercept[SparkException] {
           CatalystSqlParser.parseDataType("TIMESTAMP(10) WITH LOCAL TIME ZONE")
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index f0c965f29889..0334e90af68f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -1517,10 +1517,11 @@ class DataTypeSuite extends SparkFunSuite with 
SQLHelper {
           assert(DataType.fromJson(s"""\"$name($n )\"""") === factory(n))
         }
 
-        // Out-of-range precisions surface as INVALID_TIMESTAMP_PRECISION. The 
overflowing
-        // case verifies the original digit string is preserved instead of 
leaking
-        // NumberFormatException.
-        Seq("0", "6", "10", overflowing).foreach { p =>
+        // Out-of-range precisions surface as INVALID_TIMESTAMP_PRECISION. 
Precision 6 is
+        // valid (maps to the GA type) and is covered separately. Precision 5 
is included
+        // to pin the lower boundary of the p=6 carve-out. The overflowing 
case verifies
+        // the original digit string is preserved instead of leaking 
NumberFormatException.
+        Seq("0", "5", "10", overflowing).foreach { p =>
           checkError(
             exception = intercept[SparkException] {
               DataType.fromJson(s"""\"$name($p)\"""")
@@ -1563,6 +1564,25 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper 
{
     assert(DataType.fromJson("\"timestamp_ntz\"") === TimestampNTZType)
   }
 
+  test("SPARK-57163: parse timestamp_*(6) as the GA microsecond types") {
+    // Precision 6 maps to the GA types regardless of the preview flag.
+    Seq("true", "false").foreach { flag =>
+      withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> flag) {
+        // Compact form and whitespace-tolerant forms (mirrors the nanos-type 
test pattern).
+        assert(DataType.fromJson("\"timestamp_ltz(6)\"") === TimestampType)
+        assert(DataType.fromJson("\"timestamp_ltz( 6)\"") === TimestampType)
+        assert(DataType.fromJson("\"timestamp_ltz(6 )\"") === TimestampType)
+        assert(DataType.fromJson("\"timestamp_ntz(6)\"") === TimestampNTZType)
+        assert(DataType.fromJson("\"timestamp_ntz( 6)\"") === TimestampNTZType)
+        assert(DataType.fromJson("\"timestamp_ntz(6 )\"") === TimestampNTZType)
+        assert(DataType.fromDDL("ts timestamp_ntz(6)") ===
+          StructType(Seq(StructField("ts", TimestampNTZType))))
+        assert(DataType.fromDDL("ts timestamp_ltz(6)") ===
+          StructType(Seq(StructField("ts", TimestampType))))
+      }
+    }
+  }
+
   test("SPARK-56965: JSON parser rejects nanos timestamp types when preview 
flag is off") {
     withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "false") {
       Seq(
@@ -1579,6 +1599,21 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper 
{
               "configKey" -> "spark.sql.timestampNanosTypes.enabled",
               "configValue" -> "true"))
       }
+      // Precision 6 maps to the GA types and stays accepted with the gate off.
+      assert(DataType.fromJson("\"timestamp_ltz(6)\"") === TimestampType)
+      assert(DataType.fromJson("\"timestamp_ntz(6)\"") === TimestampNTZType)
+      // Out-of-range precisions surface as INVALID_TIMESTAMP_PRECISION 
regardless of the flag.
+      Seq("timestamp_ltz" -> "TIMESTAMP_LTZ", "timestamp_ntz" -> 
"TIMESTAMP_NTZ").foreach {
+        case (name, sqlTypeName) =>
+          Seq("0", "5", "10").foreach { p =>
+            checkError(
+              exception = intercept[SparkException] {
+                DataType.fromJson(s"""\"$name($p)\"""")
+              },
+              condition = "INVALID_TIMESTAMP_PRECISION",
+              parameters = Map("precision" -> p, "type" -> sqlTypeName))
+          }
+      }
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to