This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new da6d169ce [VL] Enable to_utc_timestamp Spark function (#5139)
da6d169ce is described below

commit da6d169ced698807098da1f17758ed45fa5f6e79
Author: Ankita Victor <[email protected]>
AuthorDate: Tue Apr 2 07:35:58 2024 +0530

    [VL] Enable to_utc_timestamp Spark function (#5139)
---
 .../org/apache/gluten/utils/CHExpressionUtil.scala |  3 +-
 .../execution/ScalarFunctionsValidateSuite.scala   | 23 +++++++++++++
 .../gluten/expression/ExpressionMappings.scala     |  1 +
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  2 ++
 .../gluten/utils/velox/VeloxTestSettings.scala     |  4 +++
 .../spark/sql/GlutenDateFunctionsSuite.scala       | 38 ++++++++++++++++++++++
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  2 ++
 .../gluten/utils/velox/VeloxTestSettings.scala     |  4 +++
 .../spark/sql/GlutenDateFunctionsSuite.scala       | 38 ++++++++++++++++++++++
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  2 ++
 .../gluten/utils/velox/VeloxTestSettings.scala     |  4 +++
 .../spark/sql/GlutenDateFunctionsSuite.scala       | 38 ++++++++++++++++++++++
 .../apache/gluten/expression/ExpressionNames.scala |  1 +
 13 files changed, 159 insertions(+), 1 deletion(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index b382642fe..70aa18980 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -183,6 +183,7 @@ object CHExpressionUtil {
     BIT_LENGTH -> DefaultValidator(),
     MAKE_YM_INTERVAL -> DefaultValidator(),
     KURTOSIS -> DefaultValidator(),
-    REGR_R2 -> DefaultValidator()
+    REGR_R2 -> DefaultValidator(),
+    TO_UTC_TIMESTAMP -> DefaultValidator()
   )
 }
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index 038571547..527621c44 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -18,6 +18,8 @@ package org.apache.gluten.execution
 
 import org.apache.spark.sql.types._
 
+import java.sql.Timestamp
+
 class ScalarFunctionsValidateSuite extends FunctionsValidateTest {
   disableFallbackCheck
   import testImplicits._
@@ -325,6 +327,27 @@ class ScalarFunctionsValidateSuite extends 
FunctionsValidateTest {
     }
   }
 
+  test("to_utc_timestamp") {
+    withTempPath {
+      path =>
+        Seq(
+          (Timestamp.valueOf("2015-07-24 00:00:00"), "America/Los_Angeles"),
+          (Timestamp.valueOf("2015-07-25 00:00:00"), "America/Los_Angeles")
+        ).toDF("a", "b")
+          .write
+          .parquet(path.getCanonicalPath)
+
+        
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("view")
+
+        runQueryAndCompare("SELECT to_utc_timestamp(a, 
\"America/Los_Angeles\") from view") {
+          checkOperatorMatch[ProjectExecTransformer]
+        }
+        runQueryAndCompare("SELECT to_utc_timestamp(a, b) from view") {
+          checkOperatorMatch[ProjectExecTransformer]
+        }
+    }
+  }
+
   test("array_aggregate") {
     withTempPath {
       path =>
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index 46803a93e..1bb6c5d16 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -181,6 +181,7 @@ object ExpressionMappings {
     Sig[DateFromUnixDate](DATE_FROM_UNIX_DATE),
     Sig[MakeTimestamp](MAKE_TIMESTAMP),
     Sig[MakeYMInterval](MAKE_YM_INTERVAL),
+    Sig[ToUTCTimestamp](TO_UTC_TIMESTAMP),
     // JSON functions
     Sig[GetJsonObject](GET_JSON_OBJECT),
     Sig[LengthOfJsonArray](JSON_ARRAY_LENGTH),
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index fbe485849..bfecd2292 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -295,6 +295,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-30793: truncate timestamps before the epoch to seconds and 
minutes")
     .excludeGlutenTest("unix_timestamp")
     .excludeGlutenTest("to_unix_timestamp")
+    .exclude("to_utc_timestamp with literal zone")
+    .exclude("to_utc_timestamp with column zone")
   enableSuite[GlutenDeprecatedAPISuite]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
     "SPARK-32659: Fix the data issue when pruning DPP on non-atomic type")
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index ec0dee2af..924554aea 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -261,6 +261,10 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("to_timestamp")
     // Legacy mode is not supported, assuming this mode is not commonly used.
     .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+    // Replaced by another test.
+    .exclude("to_utc_timestamp with literal zone")
+    // Replaced by another test.
+    .exclude("to_utc_timestamp with column zone")
   enableSuite[GlutenDataFrameFunctionsSuite]
     // blocked by Velox-5768
     .exclude("aggregate function - array for primitive type containing null")
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index 8d1f7320d..b1d1cd43c 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -248,4 +248,42 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite 
with GlutenSQLTestsTra
         }
     }
   }
+
+  testGluten("to_utc_timestamp with literal zone") {
+    val df = Seq(
+      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
+      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
+    ).toDF("a", "b")
+    checkAnswer(
+      df.select(to_utc_timestamp(col("a"), "America/Los_Angeles")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-25 07:00:00")))
+    )
+    checkAnswer(
+      df.select(to_utc_timestamp(col("b"), "America/Los_Angeles")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-25 07:00:00")))
+    )
+  }
+
+  testGluten("to_utc_timestamp with column zone") {
+    val df = Seq(
+      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", 
"America/Los_Angeles"),
+      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", 
"Europe/Paris")
+    ).toDF("a", "b", "c")
+    checkAnswer(
+      df.select(to_utc_timestamp(col("a"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 22:00:00")))
+    )
+    checkAnswer(
+      df.select(to_utc_timestamp(col("b"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 22:00:00")))
+    )
+  }
 }
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index c54c01717..385ef6381 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -321,6 +321,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-30793: truncate timestamps before the epoch to seconds and 
minutes")
     .excludeGlutenTest("unix_timestamp")
     .excludeGlutenTest("to_unix_timestamp")
+    .exclude("to_utc_timestamp with literal zone")
+    .exclude("to_utc_timestamp with column zone")
   enableSuite[GlutenDeprecatedAPISuite]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
     "SPARK-32659: Fix the data issue when pruning DPP on non-atomic type")
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 7a2e77937..c95ece47f 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1024,6 +1024,10 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("to_timestamp")
     // Legacy mode is not supported, assuming this mode is not commonly used.
     .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+    // Replaced by another test.
+    .exclude("to_utc_timestamp with literal zone")
+    // Replaced by another test.
+    .exclude("to_utc_timestamp with column zone")
   enableSuite[GlutenDeprecatedAPISuite]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de4..8fdb69229 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,42 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite 
with GlutenSQLTestsTra
         }
     }
   }
+
+  testGluten("to_utc_timestamp with literal zone") {
+    val df = Seq(
+      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
+      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
+    ).toDF("a", "b")
+    checkAnswer(
+      df.select(to_utc_timestamp(col("a"), "America/Los_Angeles")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-25 07:00:00")))
+    )
+    checkAnswer(
+      df.select(to_utc_timestamp(col("b"), "America/Los_Angeles")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-25 07:00:00")))
+    )
+  }
+
+  testGluten("to_utc_timestamp with column zone") {
+    val df = Seq(
+      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", 
"America/Los_Angeles"),
+      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", 
"Europe/Paris")
+    ).toDF("a", "b", "c")
+    checkAnswer(
+      df.select(to_utc_timestamp(col("a"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 22:00:00")))
+    )
+    checkAnswer(
+      df.select(to_utc_timestamp(col("b"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 22:00:00")))
+    )
+  }
 }
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 31a747791..370df059c 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -319,6 +319,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-30793: truncate timestamps before the epoch to seconds and 
minutes")
     .excludeGlutenTest("unix_timestamp")
     .excludeGlutenTest("to_unix_timestamp")
+    .exclude("to_utc_timestamp with literal zone")
+    .exclude("to_utc_timestamp with column zone")
   enableSuite[GlutenDeprecatedAPISuite]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
     "SPARK-32659: Fix the data issue when pruning DPP on non-atomic type")
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 15c58edc3..f581bb2da 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1028,6 +1028,10 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("to_timestamp")
     // Legacy mode is not supported, assuming this mode is not commonly used.
     .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+    // Replaced by another test.
+    .exclude("to_utc_timestamp with literal zone")
+    // Replaced by another test.
+    .exclude("to_utc_timestamp with column zone")
   enableSuite[GlutenDeprecatedAPISuite]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de4..8fdb69229 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,42 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite 
with GlutenSQLTestsTra
         }
     }
   }
+
+  testGluten("to_utc_timestamp with literal zone") {
+    val df = Seq(
+      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
+      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
+    ).toDF("a", "b")
+    checkAnswer(
+      df.select(to_utc_timestamp(col("a"), "America/Los_Angeles")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-25 07:00:00")))
+    )
+    checkAnswer(
+      df.select(to_utc_timestamp(col("b"), "America/Los_Angeles")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-25 07:00:00")))
+    )
+  }
+
+  testGluten("to_utc_timestamp with column zone") {
+    val df = Seq(
+      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", 
"America/Los_Angeles"),
+      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", 
"Europe/Paris")
+    ).toDF("a", "b", "c")
+    checkAnswer(
+      df.select(to_utc_timestamp(col("a"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 22:00:00")))
+    )
+    checkAnswer(
+      df.select(to_utc_timestamp(col("b"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 22:00:00")))
+    )
+  }
 }
diff --git 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index 58dd9ac12..96febfde7 100644
--- 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++ 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -198,6 +198,7 @@ object ExpressionNames {
   final val DATE_FROM_UNIX_DATE = "date_from_unix_date"
   final val MAKE_TIMESTAMP = "make_timestamp"
   final val MAKE_YM_INTERVAL = "make_ym_interval"
+  final val TO_UTC_TIMESTAMP = "to_utc_timestamp"
 
   // JSON functions
   final val GET_JSON_OBJECT = "get_json_object"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to