This is an automated email from the ASF dual-hosted git repository.

mingliang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new a305ba43ab [GLUTEN-10397][VL] Add timestampadd support (#10400)
a305ba43ab is described below

commit a305ba43ab969199172134ad979a00b62e257901
Author: Mingliang Zhu <[email protected]>
AuthorDate: Thu Aug 14 07:31:28 2025 +0800

    [GLUTEN-10397][VL] Add timestampadd support (#10400)
---
 .../clickhouse/CHSparkPlanExecApi.scala            |  20 ++++
 .../expression/CHExpressionTransformer.scala       |  13 +++
 .../backendsapi/velox/VeloxSparkPlanExecApi.scala  |  14 +++
 .../functions/DateFunctionsValidateSuite.scala     |  14 +++
 .../gluten/backendsapi/SparkPlanExecApi.scala      |   6 ++
 .../DateTimeExpressionsTransformer.scala           |   3 +-
 .../gluten/expression/ExpressionConverter.scala    |  20 ++--
 .../utils/clickhouse/ClickHouseTestSettings.scala  |   1 +
 .../gluten/utils/velox/VeloxTestSettings.scala     |   3 +
 .../expressions/GlutenDateExpressionsSuite.scala   | 104 +++++++++++++++++++++
 .../utils/clickhouse/ClickHouseTestSettings.scala  |   1 +
 .../gluten/utils/velox/VeloxTestSettings.scala     |   3 +
 .../expressions/GlutenDateExpressionsSuite.scala   | 104 +++++++++++++++++++++
 .../gluten/sql/shims/spark34/Spark34Shims.scala    |   8 ++
 .../gluten/sql/shims/spark35/Spark35Shims.scala    |   8 ++
 15 files changed, 306 insertions(+), 16 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
index ce7d4e0c7b..2af5c3d71b 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
@@ -1006,4 +1006,24 @@ class CHSparkPlanExecApi extends SparkPlanExecApi with 
Logging {
       columnName) || (SparkVersionUtil.gteSpark35 && 
columnName.equalsIgnoreCase(
       "__delta_internal_is_row_deleted"))
   }
+
+  override def genTimestampAddTransformer(
+      substraitExprName: String,
+      left: ExpressionTransformer,
+      right: ExpressionTransformer,
+      original: Expression): ExpressionTransformer = {
+    // Since spark 3.3.0
+    val extract =
+      SparkShimLoader.getSparkShims.extractExpressionTimestampAddUnit(original)
+    if (extract.isEmpty) {
+      throw new UnsupportedOperationException(s"Not support expression 
TimestampAdd.")
+    }
+    CHTimestampAddTransformer(
+      substraitExprName,
+      extract.get.head,
+      left,
+      right,
+      extract.get.last,
+      original)
+  }
 }
diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/expression/CHExpressionTransformer.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/expression/CHExpressionTransformer.scala
index c5111fef83..984fb2f790 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/expression/CHExpressionTransformer.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/expression/CHExpressionTransformer.scala
@@ -294,3 +294,16 @@ case class CHArraySortTransformer(
     }
   }
 }
+
+case class CHTimestampAddTransformer(
+    substraitExprName: String,
+    unit: String,
+    left: ExpressionTransformer,
+    right: ExpressionTransformer,
+    timeZoneId: String,
+    original: Expression)
+  extends ExpressionTransformer {
+  override def children: Seq[ExpressionTransformer] = {
+    Seq(LiteralTransformer(unit), left, right, LiteralTransformer(timeZoneId))
+  }
+}
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index 12e2c80607..cafea465fd 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -967,6 +967,20 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
     VeloxColumnarToCarrierRowExec.enforce(plan)
   }
 
+  override def genTimestampAddTransformer(
+      substraitExprName: String,
+      left: ExpressionTransformer,
+      right: ExpressionTransformer,
+      original: Expression): ExpressionTransformer = {
+    // Since spark 3.3.0
+    val extract =
+      SparkShimLoader.getSparkShims.extractExpressionTimestampAddUnit(original)
+    if (extract.isEmpty) {
+      throw new UnsupportedOperationException(s"Not support expression 
TimestampAdd.")
+    }
+    TimestampAddTransformer(substraitExprName, extract.get.head, left, right, 
original)
+  }
+
   override def genTimestampDiffTransformer(
       substraitExprName: String,
       left: ExpressionTransformer,
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
index 1fa449bbb2..df761ce2df 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
@@ -278,6 +278,20 @@ abstract class DateFunctionsValidateSuite extends 
FunctionsValidateSuite {
     }
   }
 
+  testWithMinSparkVersion("timestampadd", "3.3") {
+    withTempPath {
+      path =>
+        val ts = Timestamp.valueOf("2020-02-29 00:00:00.500")
+        val quantity = 1
+        Seq((ts, quantity)).toDF("ts", 
"quantity").write.parquet(path.getCanonicalPath)
+
+        
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("time")
+        runQueryAndCompare("select timestampadd(day, quantity, ts) from time") 
{
+          checkGlutenOperatorMatch[ProjectExecTransformer]
+        }
+    }
+  }
+
   testWithMinSparkVersion("timestampdiff", "3.3") {
     withTempPath {
       path =>
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index 8676014427..c6cdcc3f32 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -762,6 +762,12 @@ trait SparkPlanExecApi {
   def deserializeColumnarBatch(input: ObjectInputStream): ColumnarBatch =
     throw new GlutenNotSupportException("Deserialize ColumnarBatch is not 
supported")
 
+  def genTimestampAddTransformer(
+      substraitExprName: String,
+      left: ExpressionTransformer,
+      right: ExpressionTransformer,
+      original: Expression): ExpressionTransformer
+
   def genTimestampDiffTransformer(
       substraitExprName: String,
       left: ExpressionTransformer,
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/DateTimeExpressionsTransformer.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/DateTimeExpressionsTransformer.scala
index b5206b0ac5..2e2611d5ab 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/DateTimeExpressionsTransformer.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/DateTimeExpressionsTransformer.scala
@@ -66,11 +66,10 @@ case class TimestampAddTransformer(
     unit: String,
     left: ExpressionTransformer,
     right: ExpressionTransformer,
-    timeZoneId: String,
     original: Expression)
   extends ExpressionTransformer {
   override def children: Seq[ExpressionTransformer] = {
-    Seq(LiteralTransformer(unit), left, right, LiteralTransformer(timeZoneId))
+    Seq(LiteralTransformer(unit), left, right)
   }
 }
 
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index 6f3d8d6346..7673b1a6f1 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -569,20 +569,12 @@ object ExpressionConverter extends SQLConfHelper with 
Logging {
           substraitExprName,
           m.children.map(replaceWithExpressionTransformer0(_, attributeSeq, 
expressionsMap)),
           m)
-      case timestampAdd if 
timestampAdd.getClass.getSimpleName.equals("TimestampAdd") =>
-        // for spark3.3
-        val extract = 
SparkShimLoader.getSparkShims.extractExpressionTimestampAddUnit(timestampAdd)
-        if (extract.isEmpty) {
-          throw new UnsupportedOperationException(s"Not support expression 
TimestampAdd.")
-        }
-        val add = timestampAdd.asInstanceOf[BinaryExpression]
-        TimestampAddTransformer(
-          substraitExprName,
-          extract.get.head,
-          replaceWithExpressionTransformer0(add.left, attributeSeq, 
expressionsMap),
-          replaceWithExpressionTransformer0(add.right, attributeSeq, 
expressionsMap),
-          extract.get.last,
-          add
+      case tsAdd: BinaryExpression if 
tsAdd.getClass.getSimpleName.equals("TimestampAdd") =>
+        
BackendsApiManager.getSparkPlanExecApiInstance.genTimestampAddTransformer(
+          substraitExprName,
+          replaceWithExpressionTransformer0(tsAdd.left, attributeSeq, 
expressionsMap),
+          replaceWithExpressionTransformer0(tsAdd.right, attributeSeq, 
expressionsMap),
+          tsAdd
         )
       case tsDiff: BinaryExpression if 
tsDiff.getClass.getSimpleName.equals("TimestampDiff") =>
         
BackendsApiManager.getSparkPlanExecApiInstance.genTimestampDiffTransformer(
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 0bcc9e5111..36beac1eb4 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -631,6 +631,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-37552: convert a timestamp_ntz to another time zone")
     .exclude("SPARK-38195: add a quantity of interval units to a timestamp")
     .exclude("SPARK-38284: difference between two timestamps in units")
+    .exclude("SPARK-42635: timestampadd near daylight saving transition")
     .excludeGlutenTest("unix_timestamp")
     .excludeGlutenTest("to_unix_timestamp")
     .excludeGlutenTest("Hour")
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index e13bccc72b..87262a36d9 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -137,6 +137,9 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("to_timestamp exception mode")
     // Replaced by a gluten test to pass timezone through config.
     .exclude("from_unixtime")
+    // Vanilla Spark does not have a unified DST Timestamp fastTime. 
1320570000000L and
+    // 1320566400000L both represent 2011-11-06 01:00:00
+    .exclude("SPARK-42635: timestampadd near daylight saving transition")
     // 
https://github.com/facebookincubator/velox/pull/10563/files#diff-140dc50e6dac735f72d29014da44b045509df0dd1737f458de1fe8cfd33d8145
     .excludeGlutenTest("from_unixtime")
   enableSuite[GlutenDecimalExpressionSuite]
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala
index a76ba83948..794db27c12 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.GlutenTestsTrait
 import org.apache.spark.sql.catalyst.InternalRow
 import 
org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, 
TimeZoneUTC}
@@ -474,4 +475,107 @@ class GlutenDateExpressionsSuite extends 
DateExpressionsSuite with GlutenTestsTr
       }
     }
   }
+
+  testGluten("SPARK-42635: timestampadd near daylight saving transition") {
+    // In America/Los_Angeles timezone, timestamp value `skippedTime` is 
2011-03-13 03:00:00.
+    // The next second of 2011-03-13 01:59:59 jumps to 2011-03-13 03:00:00.
+    val skippedTime = 1300010400000000L
+    // In America/Los_Angeles timezone, both timestamp range `[repeatedTime - 
MICROS_PER_HOUR,
+    // repeatedTime)` and `[repeatedTime, repeatedTime + MICROS_PER_HOUR)` map 
to
+    // [2011-11-06 01:00:00, 2011-11-06 02:00:00).
+    // The next second of 2011-11-06 01:59:59 (pre-transition) jumps back to 
2011-11-06 01:00:00.
+    val repeatedTime = 1320570000000000L
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> LA.getId) {
+      // Adding one day is **not** equivalent to adding <unit>_PER_DAY time 
units, because not every
+      // day has 24 hours: 2011-03-13 has 23 hours, 2011-11-06 has 25 hours.
+
+      // timestampadd(DAY, 1, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
+      checkEvaluation(
+        TimestampAdd("DAY", Literal(1), Literal(skippedTime - 23 * 
MICROS_PER_HOUR, TimestampType)),
+        skippedTime)
+      // timestampadd(HOUR, 24, 2011-03-12 03:00:00) = 2011-03-13 04:00:00
+      checkEvaluation(
+        TimestampAdd(
+          "HOUR",
+          Literal(24),
+          Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        skippedTime + MICROS_PER_HOUR)
+      // timestampadd(HOUR, 23, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
+      checkEvaluation(
+        TimestampAdd(
+          "HOUR",
+          Literal(23),
+          Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        skippedTime)
+      // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 
2011-03-13 04:00:00
+      checkEvaluation(
+        TimestampAdd(
+          "SECOND",
+          Literal(SECONDS_PER_DAY.toInt),
+          Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        skippedTime + MICROS_PER_HOUR)
+      // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 
2011-03-13 03:59:59
+      checkEvaluation(
+        TimestampAdd(
+          "SECOND",
+          Literal(SECONDS_PER_DAY.toInt - 1),
+          Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        skippedTime + MICROS_PER_HOUR - MICROS_PER_SECOND
+      )
+
+      // timestampadd(DAY, 1, 2011-11-05 02:00:00) = 2011-11-06 02:00:00
+      checkEvaluation(
+        TimestampAdd(
+          "DAY",
+          Literal(1),
+          Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime + MICROS_PER_HOUR)
+      // timestampadd(DAY, 1, 2011-11-05 01:00:00) = 2011-11-06 01:00:00 
(pre-transition)
+      checkEvaluation(
+        TimestampAdd(
+          "DAY",
+          Literal(1),
+          Literal(repeatedTime - 25 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime - MICROS_PER_HOUR)
+      // timestampadd(DAY, -1, 2011-11-07 01:00:00) = 2011-11-06 01:00:00 
(post-transition).
+      // Vanilla spark result is 1320570000000000L, velox result is 
1320566400000000L, they
+      // are all 2011-11-06 01:00:00.
+      checkEvaluation(
+        TimestampAdd(
+          "DAY",
+          Literal(-1),
+          Literal(repeatedTime + 24 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime - MICROS_PER_HOUR)
+      // timestampadd(MONTH, 1, 2011-10-06 01:00:00) = 2011-11-06 01:00:00 
(pre-transition)
+      checkEvaluation(
+        TimestampAdd(
+          "MONTH",
+          Literal(1),
+          Literal(repeatedTime - MICROS_PER_HOUR - 31 * MICROS_PER_DAY, 
TimestampType)),
+        repeatedTime - MICROS_PER_HOUR)
+      // timestampadd(MONTH, -1, 2011-12-06 01:00:00) = 2011-11-06 01:00:00 
(post-transition)
+      // Vanilla spark result is 1320570000000000L, velox result is 
1320566400000000L, they
+      // are all 2011-11-06 01:00:00.
+      checkEvaluation(
+        TimestampAdd(
+          "MONTH",
+          Literal(-1),
+          Literal(repeatedTime + 30 * MICROS_PER_DAY, TimestampType)),
+        repeatedTime - MICROS_PER_HOUR)
+      // timestampadd(HOUR, 23, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 
(pre-transition)
+      checkEvaluation(
+        TimestampAdd(
+          "HOUR",
+          Literal(23),
+          Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime - MICROS_PER_HOUR)
+      // timestampadd(HOUR, 24, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 
(post-transition)
+      checkEvaluation(
+        TimestampAdd(
+          "HOUR",
+          Literal(24),
+          Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime)
+    }
+  }
 }
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 6a0b47f2d7..88eed1dbaf 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -681,6 +681,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("to_timestamp exception mode")
     // Replaced by a gluten test to pass timezone through config.
     .exclude("from_unixtime")
+    .exclude("SPARK-42635: timestampadd near daylight saving transition")
     // 
https://github.com/facebookincubator/velox/pull/10563/files#diff-140dc50e6dac735f72d29014da44b045509df0dd1737f458de1fe8cfd33d8145
     .excludeGlutenTest("from_unixtime")
     .excludeCH("DayOfYear")
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 74fe04b9a6..9462248fac 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -137,6 +137,9 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("to_timestamp exception mode")
     // Replaced by a gluten test to pass timezone through config.
     .exclude("from_unixtime")
+    // Vanilla Spark does not have a unified DST Timestamp fastTime. 
1320570000000L and
+    // 1320566400000L both represent 2011-11-06 01:00:00.
+    .exclude("SPARK-42635: timestampadd near daylight saving transition")
     // 
https://github.com/facebookincubator/velox/pull/10563/files#diff-140dc50e6dac735f72d29014da44b045509df0dd1737f458de1fe8cfd33d8145
     .excludeGlutenTest("from_unixtime")
   enableSuite[GlutenDecimalExpressionSuite]
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala
index a76ba83948..d53c9187d3 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.GlutenTestsTrait
 import org.apache.spark.sql.catalyst.InternalRow
 import 
org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, 
TimeZoneUTC}
@@ -474,4 +475,107 @@ class GlutenDateExpressionsSuite extends 
DateExpressionsSuite with GlutenTestsTr
       }
     }
   }
+
+  testGluten("SPARK-42635: timestampadd near daylight saving transition") {
+    // In America/Los_Angeles timezone, timestamp value `skippedTime` is 
2011-03-13 03:00:00.
+    // The next second of 2011-03-13 01:59:59 jumps to 2011-03-13 03:00:00.
+    val skippedTime = 1300010400000000L
+    // In America/Los_Angeles timezone, both timestamp range `[repeatedTime - 
MICROS_PER_HOUR,
+    // repeatedTime)` and `[repeatedTime, repeatedTime + MICROS_PER_HOUR)` map 
to
+    // [2011-11-06 01:00:00, 2011-11-06 02:00:00).
+    // The next second of 2011-11-06 01:59:59 (pre-transition) jumps back to 
2011-11-06 01:00:00.
+    val repeatedTime = 1320570000000000L
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> LA.getId) {
+      // Adding one day is **not** equivalent to adding <unit>_PER_DAY time 
units, because not every
+      // day has 24 hours: 2011-03-13 has 23 hours, 2011-11-06 has 25 hours.
+
+      // timestampadd(DAY, 1, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
+      checkEvaluation(
+        TimestampAdd("DAY", Literal(1), Literal(skippedTime - 23 * 
MICROS_PER_HOUR, TimestampType)),
+        skippedTime)
+      // timestampadd(HOUR, 24, 2011-03-12 03:00:00) = 2011-03-13 04:00:00
+      checkEvaluation(
+        TimestampAdd(
+          "HOUR",
+          Literal(24),
+          Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        skippedTime + MICROS_PER_HOUR)
+      // timestampadd(HOUR, 23, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
+      checkEvaluation(
+        TimestampAdd(
+          "HOUR",
+          Literal(23),
+          Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        skippedTime)
+      // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 
2011-03-13 04:00:00
+      checkEvaluation(
+        TimestampAdd(
+          "SECOND",
+          Literal(SECONDS_PER_DAY.toInt),
+          Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        skippedTime + MICROS_PER_HOUR)
+      // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 
2011-03-13 03:59:59
+      checkEvaluation(
+        TimestampAdd(
+          "SECOND",
+          Literal(SECONDS_PER_DAY.toInt - 1),
+          Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        skippedTime + MICROS_PER_HOUR - MICROS_PER_SECOND
+      )
+
+      // timestampadd(DAY, 1, 2011-11-05 02:00:00) = 2011-11-06 02:00:00
+      checkEvaluation(
+        TimestampAdd(
+          "DAY",
+          Literal(1),
+          Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime + MICROS_PER_HOUR)
+      // timestampadd(DAY, 1, 2011-11-05 01:00:00) = 2011-11-06 01:00:00 
(pre-transition)
+      checkEvaluation(
+        TimestampAdd(
+          "DAY",
+          Literal(1),
+          Literal(repeatedTime - 25 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime - MICROS_PER_HOUR)
+      // timestampadd(DAY, -1, 2011-11-07 01:00:00) = 2011-11-06 01:00:00 
(post-transition)
+      // Vanilla spark result is 1320570000000000L, velox result is 
1320566400000000L, they
+      // are all 2011-11-06 01:00:00.
+      checkEvaluation(
+        TimestampAdd(
+          "DAY",
+          Literal(-1),
+          Literal(repeatedTime + 24 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime - MICROS_PER_HOUR)
+      // timestampadd(MONTH, 1, 2011-10-06 01:00:00) = 2011-11-06 01:00:00 
(pre-transition)
+      checkEvaluation(
+        TimestampAdd(
+          "MONTH",
+          Literal(1),
+          Literal(repeatedTime - MICROS_PER_HOUR - 31 * MICROS_PER_DAY, 
TimestampType)),
+        repeatedTime - MICROS_PER_HOUR)
+      // timestampadd(MONTH, -1, 2011-12-06 01:00:00) = 2011-11-06 01:00:00 
(post-transition)
+      // Vanilla spark result is 1320570000000000L, velox result is 
1320566400000000L, they
+      // are all 2011-11-06 01:00:00.
+      checkEvaluation(
+        TimestampAdd(
+          "MONTH",
+          Literal(-1),
+          Literal(repeatedTime + 30 * MICROS_PER_DAY, TimestampType)),
+        repeatedTime - MICROS_PER_HOUR)
+      // timestampadd(HOUR, 23, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 
(pre-transition)
+      checkEvaluation(
+        TimestampAdd(
+          "HOUR",
+          Literal(23),
+          Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime - MICROS_PER_HOUR)
+      // timestampadd(HOUR, 24, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 
(post-transition)
+      checkEvaluation(
+        TimestampAdd(
+          "HOUR",
+          Literal(24),
+          Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime)
+    }
+  }
 }
diff --git 
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
 
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
index 08253d088c..16affb2ad8 100644
--- 
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
+++ 
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
@@ -627,6 +627,14 @@ class Spark34Shims extends SparkShims {
 
   override def unBase64FunctionFailsOnError(unBase64: UnBase64): Boolean = 
unBase64.failOnError
 
+  override def extractExpressionTimestampAddUnit(exp: Expression): 
Option[Seq[String]] = {
+    exp match {
+      case timestampAdd: TimestampAdd =>
+        Option.apply(Seq(timestampAdd.unit, 
timestampAdd.timeZoneId.getOrElse("")))
+      case _ => Option.empty
+    }
+  }
+
   override def extractExpressionTimestampDiffUnit(exp: Expression): 
Option[String] = {
     exp match {
       case timestampDiff: TimestampDiff =>
diff --git 
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
 
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
index 643aed59b1..e48a401722 100644
--- 
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
+++ 
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
@@ -687,6 +687,14 @@ class Spark35Shims extends SparkShims {
 
   override def unBase64FunctionFailsOnError(unBase64: UnBase64): Boolean = 
unBase64.failOnError
 
+  override def extractExpressionTimestampAddUnit(exp: Expression): 
Option[Seq[String]] = {
+    exp match {
+      case timestampAdd: TimestampAdd =>
+        Option.apply(Seq(timestampAdd.unit, 
timestampAdd.timeZoneId.getOrElse("")))
+      case _ => Option.empty
+    }
+  }
+
   override def extractExpressionTimestampDiffUnit(exp: Expression): 
Option[String] = {
     exp match {
       case timestampDiff: TimestampDiff =>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to