This is an automated email from the ASF dual-hosted git repository.

parthc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 5f37dfc2e test: Port DateTimeUtilsSuite timestamp format tests in 
Comet (#3780)
5f37dfc2e is described below

commit 5f37dfc2e756a61fc8d7e3a156f1e5d190c46b01
Author: Parth Chandra <[email protected]>
AuthorDate: Mon Mar 23 17:49:48 2026 -0700

    test: Port DateTimeUtilsSuite timestamp format tests in Comet (#3780)
    
    * test: Port DateTimeUtilsSuite timestamp format tests in Comet
---
 .github/workflows/pr_build_linux.yml               |   1 +
 .../org/apache/comet/CometDateTimeUtilsSuite.scala | 284 +++++++++++++++++++++
 2 files changed, 285 insertions(+)

diff --git a/.github/workflows/pr_build_linux.yml 
b/.github/workflows/pr_build_linux.yml
index 24f3ed2fe..dfacdc294 100644
--- a/.github/workflows/pr_build_linux.yml
+++ b/.github/workflows/pr_build_linux.yml
@@ -330,6 +330,7 @@ jobs:
               org.apache.comet.CometTemporalExpressionSuite
               org.apache.comet.CometArrayExpressionSuite
               org.apache.comet.CometCastSuite
+              org.apache.comet.CometDateTimeUtilsSuite
               org.apache.comet.CometMathExpressionSuite
               org.apache.comet.CometStringExpressionSuite
               org.apache.comet.CometBitwiseExpressionSuite
diff --git 
a/spark/src/test/scala/org/apache/comet/CometDateTimeUtilsSuite.scala 
b/spark/src/test/scala/org/apache/comet/CometDateTimeUtilsSuite.scala
new file mode 100644
index 000000000..770766134
--- /dev/null
+++ b/spark/src/test/scala/org/apache/comet/CometDateTimeUtilsSuite.scala
@@ -0,0 +1,284 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+import java.io.File
+
+import org.apache.spark.sql.{CometTestBase, DataFrame, SaveMode}
+import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.DataTypes
+
+/**
+ * Tests for string-to-timestamp (and string-to-date) cast correctness, ported 
from Spark's
+ * DateTimeUtilsSuite. That suite lives in sql/catalyst and does not extend 
CometTestBase, so it
+ * never runs in Comet CI.
+ */
+class CometDateTimeUtilsSuite extends CometTestBase {
+
+  import testImplicits._
+
+  private def roundtripParquet(df: DataFrame, tempDir: File): DataFrame = {
+    val filename = new File(tempDir, 
s"dtutils_${System.currentTimeMillis()}.parquet").toString
+    df.write.mode(SaveMode.Overwrite).parquet(filename)
+    spark.read.parquet(filename)
+  }
+
+  /**
+   * Writes values to Parquet (to prevent constant folding), casts the "a" 
column to
+   * TimestampType, and asserts that Comet produces the same result as Spark.
+   */
+  private def checkCastToTimestamp(values: Seq[String]): Unit = {
+    withTempPath { dir =>
+      val df = roundtripParquet(values.toDF("a"), dir).coalesce(1)
+      checkSparkAnswer(df.withColumn("ts", 
col("a").cast(DataTypes.TimestampType)))
+    }
+  }
+
+  /**
+   * Same as checkCastToTimestamp but casts the result to STRING before 
collecting. Use for
+   * extreme-year values (e.g. year 294247 or -290308) where collect() 
overflows in
+   * toJavaTimestamp due to Gregorian/Julian calendar rebasing in the test 
harness.
+   */
+  private def checkCastToTimestampAsString(values: Seq[String]): Unit = {
+    withTempPath { dir =>
+      val df = roundtripParquet(values.toDF("a"), dir).coalesce(1)
+      checkSparkAnswer(
+        df.withColumn("ts", 
col("a").cast(DataTypes.TimestampType).cast(DataTypes.StringType)))
+    }
+  }
+
+  private def checkCastToTimestampNTZ(values: Seq[String]): Unit = {
+    withTempPath { dir =>
+      val df = roundtripParquet(values.toDF("a"), dir).coalesce(1)
+      checkSparkAnswer(df.withColumn("ts", 
col("a").cast(DataTypes.TimestampNTZType)))
+    }
+  }
+
+  private def checkCastToDate(values: Seq[String]): Unit = {
+    withTempPath { dir =>
+      val df = roundtripParquet(values.toDF("a"), dir).coalesce(1)
+      checkSparkAnswer(df.withColumn("dt", col("a").cast(DataTypes.DateType)))
+    }
+  }
+
+  test("string to timestamp - basic date and datetime formats") {
+    // Run for a few representative session timezones instead of the 
ALL_TIMEZONES loop in
+    // Spark's DateTimeUtilsSuite. The exact UTC epoch depends on the session 
timezone for
+    // values without an embedded offset.
+    for (tz <- Seq("UTC", "America/Los_Angeles", "Asia/Kathmandu")) {
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz) {
+        checkCastToTimestamp(
+          Seq(
+            "1969-12-31 16:00:00",
+            "0001",
+            "2015-03",
+            // Leading/trailing whitespace is accepted
+            "2015-03-18",
+            "2015-03-18 ",
+            " 2015-03-18",
+            " 2015-03-18 ",
+            "2015-03-18 12:03:17",
+            "2015-03-18T12:03:17",
+            // Milliseconds
+            "2015-03-18 12:03:17.123",
+            "2015-03-18T12:03:17.123",
+            // Microseconds
+            "2015-03-18T12:03:17.123121",
+            "2015-03-18T12:03:17.12312",
+            // More than 6 fractional digits — truncated to microseconds
+            "2015-03-18T12:03:17.123456789",
+            // Time-only: bare HH:MM:SS
+            "18:12:15",
+            // Milliseconds with more than 6 fractional digits (truncated)
+            "2011-05-06 07:08:09.1000"))
+      }
+    }
+  }
+
+  test("string to timestamp - embedded timezone offsets") {
+    // When the string includes a timezone offset it determines the UTC epoch 
regardless of the
+    // session timezone. These cases exercise the offset-parsing logic.
+    checkCastToTimestamp(
+      Seq(
+        // ±HH:MM
+        "2015-03-18T12:03:17-13:53",
+        "2015-03-18T12:03:17-01:00",
+        "2015-03-18T12:03:17+07:30",
+        "2015-03-18T12:03:17+07:03",
+        // Z and UTC
+        "2015-03-18T12:03:17Z",
+        "2015-03-18 12:03:17Z",
+        "2015-03-18 12:03:17UTC",
+        // ±HHMM (no colon) — issue #3775
+        "2015-03-18T12:03:17-1353",
+        "2015-03-18T12:03:17-0100",
+        "2015-03-18T12:03:17+0730",
+        "2015-03-18T12:03:17+0703",
+        // ±H:MM or ±H:M (single-digit hour) — issue #3775
+        "2015-03-18T12:03:17-1:0",
+        // GMT±HH:MM — issue #3775
+        "2015-03-18T12:03:17GMT-13:53",
+        "2015-03-18T12:03:17GMT-01:00",
+        "2015-03-18T12:03:17 GMT+07:30",
+        "2015-03-18T12:03:17GMT+07:03",
+        // With milliseconds
+        "2015-03-18T12:03:17.456Z",
+        "2015-03-18 12:03:17.456Z",
+        "2015-03-18 12:03:17.456 UTC",
+        "2015-03-18T12:03:17.123-1:0",
+        "2015-03-18T12:03:17.123-01:00",
+        "2015-03-18T12:03:17.123 GMT-01:00",
+        "2015-03-18T12:03:17.123-0100",
+        "2015-03-18T12:03:17.123+07:30",
+        "2015-03-18T12:03:17.123 GMT+07:30",
+        "2015-03-18T12:03:17.123+0730",
+        "2015-03-18T12:03:17.123GMT+07:30",
+        // With microseconds
+        "2015-03-18T12:03:17.123121+7:30",
+        "2015-03-18T12:03:17.123121 GMT+0730",
+        "2015-03-18T12:03:17.12312+7:30",
+        "2015-03-18T12:03:17.12312 UT+07:30",
+        "2015-03-18T12:03:17.12312+0730",
+        // Nanoseconds truncated to microseconds
+        "2015-03-18T12:03:17.123456789+0:00",
+        "2015-03-18T12:03:17.123456789 UTC+0",
+        "2015-03-18T12:03:17.123456789GMT+00:00",
+        // Named timezone — issue #3775
+        "2015-03-18T12:03:17.123456 Europe/Moscow",
+        // T-prefixed time-only with offset
+        "T18:12:15.12312+7:30",
+        "T18:12:15.12312 UTC+07:30",
+        "T18:12:15.12312+0730",
+        // Bare time-only with offset
+        "18:12:15.12312+7:30",
+        "18:12:15.12312 GMT+07:30",
+        "18:12:15.12312+0730"))
+  }
+
+  test("string to timestamp - invalid formats return null") {
+    // All of these should produce null (not throw) in non-ANSI mode.
+    for (tz <- Seq("UTC", "America/Los_Angeles")) {
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz) {
+        checkCastToTimestamp(
+          Seq(
+            "238",
+            "2015-03-18 123142",
+            "2015-03-18T123123",
+            "2015-03-18X",
+            "2015/03/18",
+            "2015.03.18",
+            "20150318",
+            "2015-031-8",
+            "015-01-18",
+            "2015-03-18T12:03.17-20:0",
+            "2015-03-18T12:03.17-0:70",
+            "2015-03-18T12:03.17-1:0:0",
+            "1999 08 01",
+            "1999-08 01",
+            "1999 08",
+            "",
+            "    ",
+            "+",
+            "T",
+            "2015-03-18T",
+            "12::",
+            "2015-03-18T12:03:17-8:",
+            "2015-03-18T12:03:17-8:30:"))
+      }
+    }
+  }
+
+  // "SPARK-35780: support full range of timestamp string"
+  test("SPARK-35780: full range of timestamp string") {
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      // Normal-range cases: collect() as TimestampType directly.
+      checkCastToTimestamp(
+        Seq(
+          // Negative year
+          "-1969-12-31 16:00:00",
+          // Zero-padded year
+          "02015-03-18 16:00:00",
+          "000001",
+          "-000001",
+          "00238",
+          // 5-digit year (within NaiveDate range)
+          "99999-03-01T12:03:17",
+          // These look like time-only but with a leading sign — invalid
+          "+12:12:12",
+          "-12:12:12",
+          // Empty / whitespace
+          "",
+          "    ",
+          "+",
+          // One microsecond past Long.MaxValue — overflows to null
+          "294247-01-10T04:00:54.775808Z",
+          // One microsecond before Long.MinValue — overflows to null
+          "-290308-12-21T19:59:05.224191Z",
+          // Integer overflow in individual fields
+          "4294967297",
+          "2021-4294967297-11",
+          "4294967297:30:00",
+          "2021-11-4294967297T12:30:00",
+          "2021-01-01T12:4294967297:00",
+          "2021-01-01T12:30:4294967297",
+          "2021-01-01T12:30:4294967297.123456",
+          "2021-01-01T12:30:4294967297+07:30",
+          "2021-01-01T12:30:4294967297UTC",
+          "2021-01-01T12:30:4294967297+4294967297:30"))
+
+      // Extreme-year boundary cases: collecting a TimestampType value for 
year 294247 or -290308
+      // overflows in toJavaTimestamp due to Gregorian/Julian rebasing in the 
test harness.
+      // Cast to STRING first to avoid that while still verifying correct 
parsing.
+      checkCastToTimestampAsString(
+        Seq(
+          // Long.MaxValue boundary — valid, equals Long.MaxValue microseconds
+          "294247-01-10T04:00:54.775807Z",
+          // Long.MinValue boundary — valid, equals Long.MinValue microseconds
+          "-290308-12-21T19:59:05.224192Z"))
+    }
+  }
+
+  test("SPARK-15379: invalid calendar dates in string to date cast") {
+    // Feb 29 on a non-leap year and Apr 31 must produce null for both DATE 
and TIMESTAMP.
+    checkCastToDate(Seq("2015-02-29 00:00:00", "2015-04-31 00:00:00", 
"2015-02-29", "2015-04-31"))
+
+    checkCastToTimestamp(
+      Seq("2015-02-29 00:00:00", "2015-04-31 00:00:00", "2015-02-29", 
"2015-04-31"))
+  }
+
+  test("trailing characters while converting string to timestamp") {
+    // Garbage after a valid ISO timestamp must make the whole value null.
+    checkCastToTimestamp(Seq("2019-10-31T10:59:23Z:::"))
+  }
+
+  test("SPARK-37326: cast string to TIMESTAMP_NTZ rejects timezone offsets") {
+    // A value with a timezone offset should be null for TIMESTAMP_NTZ.
+    checkCastToTimestampNTZ(
+      Seq(
+        // Has offset — null
+        "2021-11-22 10:54:27 +08:00",
+        // No offset — parsed as local datetime
+        "2021-11-22 10:54:27",
+        // More NTZ-compatible values
+        "2021-11-22",
+        "2021-11-22T10:54:27.123456"))
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to