This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new da6d169ce [VL] Enable to_utc_timestamp Spark function (#5139)
da6d169ce is described below
commit da6d169ced698807098da1f17758ed45fa5f6e79
Author: Ankita Victor <[email protected]>
AuthorDate: Tue Apr 2 07:35:58 2024 +0530
[VL] Enable to_utc_timestamp Spark function (#5139)
---
.../org/apache/gluten/utils/CHExpressionUtil.scala | 3 +-
.../execution/ScalarFunctionsValidateSuite.scala | 23 +++++++++++++
.../gluten/expression/ExpressionMappings.scala | 1 +
.../utils/clickhouse/ClickHouseTestSettings.scala | 2 ++
.../gluten/utils/velox/VeloxTestSettings.scala | 4 +++
.../spark/sql/GlutenDateFunctionsSuite.scala | 38 ++++++++++++++++++++++
.../utils/clickhouse/ClickHouseTestSettings.scala | 2 ++
.../gluten/utils/velox/VeloxTestSettings.scala | 4 +++
.../spark/sql/GlutenDateFunctionsSuite.scala | 38 ++++++++++++++++++++++
.../utils/clickhouse/ClickHouseTestSettings.scala | 2 ++
.../gluten/utils/velox/VeloxTestSettings.scala | 4 +++
.../spark/sql/GlutenDateFunctionsSuite.scala | 38 ++++++++++++++++++++++
.../apache/gluten/expression/ExpressionNames.scala | 1 +
13 files changed, 159 insertions(+), 1 deletion(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index b382642fe..70aa18980 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -183,6 +183,7 @@ object CHExpressionUtil {
BIT_LENGTH -> DefaultValidator(),
MAKE_YM_INTERVAL -> DefaultValidator(),
KURTOSIS -> DefaultValidator(),
- REGR_R2 -> DefaultValidator()
+ REGR_R2 -> DefaultValidator(),
+ TO_UTC_TIMESTAMP -> DefaultValidator()
)
}
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index 038571547..527621c44 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -18,6 +18,8 @@ package org.apache.gluten.execution
import org.apache.spark.sql.types._
+import java.sql.Timestamp
+
class ScalarFunctionsValidateSuite extends FunctionsValidateTest {
disableFallbackCheck
import testImplicits._
@@ -325,6 +327,27 @@ class ScalarFunctionsValidateSuite extends
FunctionsValidateTest {
}
}
+ test("to_utc_timestamp") {
+ withTempPath {
+ path =>
+ Seq(
+ (Timestamp.valueOf("2015-07-24 00:00:00"), "America/Los_Angeles"),
+ (Timestamp.valueOf("2015-07-25 00:00:00"), "America/Los_Angeles")
+ ).toDF("a", "b")
+ .write
+ .parquet(path.getCanonicalPath)
+
+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("view")
+
+ runQueryAndCompare("SELECT to_utc_timestamp(a,
\"America/Los_Angeles\") from view") {
+ checkOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("SELECT to_utc_timestamp(a, b) from view") {
+ checkOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ }
+
test("array_aggregate") {
withTempPath {
path =>
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index 46803a93e..1bb6c5d16 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -181,6 +181,7 @@ object ExpressionMappings {
Sig[DateFromUnixDate](DATE_FROM_UNIX_DATE),
Sig[MakeTimestamp](MAKE_TIMESTAMP),
Sig[MakeYMInterval](MAKE_YM_INTERVAL),
+ Sig[ToUTCTimestamp](TO_UTC_TIMESTAMP),
// JSON functions
Sig[GetJsonObject](GET_JSON_OBJECT),
Sig[LengthOfJsonArray](JSON_ARRAY_LENGTH),
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index fbe485849..bfecd2292 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -295,6 +295,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-30793: truncate timestamps before the epoch to seconds and
minutes")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
+ .exclude("to_utc_timestamp with literal zone")
+ .exclude("to_utc_timestamp with column zone")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
"SPARK-32659: Fix the data issue when pruning DPP on non-atomic type")
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index ec0dee2af..924554aea 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -261,6 +261,10 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Replaced by another test.
+ .exclude("to_utc_timestamp with literal zone")
+ // Replaced by another test.
+ .exclude("to_utc_timestamp with column zone")
enableSuite[GlutenDataFrameFunctionsSuite]
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index 8d1f7320d..b1d1cd43c 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -248,4 +248,42 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
}
}
}
+
+ testGluten("to_utc_timestamp with literal zone") {
+ val df = Seq(
+ (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
+ (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
+ ).toDF("a", "b")
+ checkAnswer(
+ df.select(to_utc_timestamp(col("a"), "America/Los_Angeles")),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-25 07:00:00")))
+ )
+ checkAnswer(
+ df.select(to_utc_timestamp(col("b"), "America/Los_Angeles")),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-25 07:00:00")))
+ )
+ }
+
+ testGluten("to_utc_timestamp with column zone") {
+ val df = Seq(
+ (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00",
"America/Los_Angeles"),
+ (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00",
"Europe/Paris")
+ ).toDF("a", "b", "c")
+ checkAnswer(
+ df.select(to_utc_timestamp(col("a"), col("c"))),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-24 22:00:00")))
+ )
+ checkAnswer(
+ df.select(to_utc_timestamp(col("b"), col("c"))),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-24 22:00:00")))
+ )
+ }
}
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index c54c01717..385ef6381 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -321,6 +321,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-30793: truncate timestamps before the epoch to seconds and
minutes")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
+ .exclude("to_utc_timestamp with literal zone")
+ .exclude("to_utc_timestamp with column zone")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
"SPARK-32659: Fix the data issue when pruning DPP on non-atomic type")
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 7a2e77937..c95ece47f 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1024,6 +1024,10 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Replaced by another test.
+ .exclude("to_utc_timestamp with literal zone")
+ // Replaced by another test.
+ .exclude("to_utc_timestamp with column zone")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de4..8fdb69229 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,42 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
}
}
}
+
+ testGluten("to_utc_timestamp with literal zone") {
+ val df = Seq(
+ (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
+ (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
+ ).toDF("a", "b")
+ checkAnswer(
+ df.select(to_utc_timestamp(col("a"), "America/Los_Angeles")),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-25 07:00:00")))
+ )
+ checkAnswer(
+ df.select(to_utc_timestamp(col("b"), "America/Los_Angeles")),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-25 07:00:00")))
+ )
+ }
+
+ testGluten("to_utc_timestamp with column zone") {
+ val df = Seq(
+ (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00",
"America/Los_Angeles"),
+ (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00",
"Europe/Paris")
+ ).toDF("a", "b", "c")
+ checkAnswer(
+ df.select(to_utc_timestamp(col("a"), col("c"))),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-24 22:00:00")))
+ )
+ checkAnswer(
+ df.select(to_utc_timestamp(col("b"), col("c"))),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-24 22:00:00")))
+ )
+ }
}
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 31a747791..370df059c 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -319,6 +319,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-30793: truncate timestamps before the epoch to seconds and
minutes")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
+ .exclude("to_utc_timestamp with literal zone")
+ .exclude("to_utc_timestamp with column zone")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
"SPARK-32659: Fix the data issue when pruning DPP on non-atomic type")
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 15c58edc3..f581bb2da 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1028,6 +1028,10 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Replaced by another test.
+ .exclude("to_utc_timestamp with literal zone")
+ // Replaced by another test.
+ .exclude("to_utc_timestamp with column zone")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de4..8fdb69229 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,42 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
}
}
}
+
+ testGluten("to_utc_timestamp with literal zone") {
+ val df = Seq(
+ (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
+ (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
+ ).toDF("a", "b")
+ checkAnswer(
+ df.select(to_utc_timestamp(col("a"), "America/Los_Angeles")),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-25 07:00:00")))
+ )
+ checkAnswer(
+ df.select(to_utc_timestamp(col("b"), "America/Los_Angeles")),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-25 07:00:00")))
+ )
+ }
+
+ testGluten("to_utc_timestamp with column zone") {
+ val df = Seq(
+ (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00",
"America/Los_Angeles"),
+ (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00",
"Europe/Paris")
+ ).toDF("a", "b", "c")
+ checkAnswer(
+ df.select(to_utc_timestamp(col("a"), col("c"))),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-24 22:00:00")))
+ )
+ checkAnswer(
+ df.select(to_utc_timestamp(col("b"), col("c"))),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-24 22:00:00")))
+ )
+ }
}
diff --git
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index 58dd9ac12..96febfde7 100644
---
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -198,6 +198,7 @@ object ExpressionNames {
final val DATE_FROM_UNIX_DATE = "date_from_unix_date"
final val MAKE_TIMESTAMP = "make_timestamp"
final val MAKE_YM_INTERVAL = "make_ym_interval"
+ final val TO_UTC_TIMESTAMP = "to_utc_timestamp"
// JSON functions
final val GET_JSON_OBJECT = "get_json_object"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]