This is an automated email from the ASF dual-hosted git repository.
zhli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new dab708026 [VL] Support PreciseTimestampConversion function (#6036)
dab708026 is described below
commit dab708026694c067b104ab901aab78bccee9463c
Author: Zhen Li <[email protected]>
AuthorDate: Wed Jun 12 11:11:18 2024 +0800
[VL] Support PreciseTimestampConversion function (#6036)
[VL] Support PreciseTimestampConversion function.
---
.../backendsapi/velox/VeloxSparkPlanExecApi.scala | 20 ++++++++++++++++++++
.../execution/ScalarFunctionsValidateSuite.scala | 21 +++++++++++++++++++++
.../gluten/backendsapi/SparkPlanExecApi.scala | 7 +++++++
.../gluten/expression/ExpressionConverter.scala | 6 ++++++
.../gluten/expression/ExpressionMappings.scala | 1 +
.../apache/gluten/expression/ExpressionNames.scala | 1 +
6 files changed, 56 insertions(+)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index 66ca8660a..26b4c5082 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -280,6 +280,26 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
GenericExpressionTransformer(substraitExprName, Seq(endDate, startDate),
original)
}
+ override def genPreciseTimestampConversionTransformer(
+ substraitExprName: String,
+ children: Seq[ExpressionTransformer],
+ expr: PreciseTimestampConversion): ExpressionTransformer = {
+ // Expression used internally to convert the TimestampType to Long and
back without losing
+ // precision, i.e. in microseconds.
+ val (newSubstraitName, newExpr) = expr match {
+ case _ @PreciseTimestampConversion(_, TimestampType, LongType) =>
+ (ExpressionMappings.expressionsMap(classOf[UnixMicros]),
UnixMicros(expr.child))
+ case _ @PreciseTimestampConversion(_, LongType, TimestampType) =>
+ (
+ ExpressionMappings.expressionsMap(classOf[MicrosToTimestamp]),
+ MicrosToTimestamp(expr.child))
+ case _ =>
+ // TimestampNTZType is not supported here.
+ throw new GlutenNotSupportException("PreciseTimestampConversion is not
supported")
+ }
+ GenericExpressionTransformer(newSubstraitName, children, newExpr)
+ }
+
/**
* Generate FilterExecTransformer.
*
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index 6df3a0623..a23fdf243 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -1077,4 +1077,25 @@ class ScalarFunctionsValidateSuite extends
FunctionsValidateTest {
}
}
}
+
+ test("PreciseTimestampConversion") {
+ withTempPath {
+ path =>
+ val df = spark
+ .sql(
+ "select * from VALUES ('A1', TIMESTAMP'2021-01-01 00:00:00'), " +
+ "('A1', TIMESTAMP'2021-01-01 00:04:30'), ('A1',
TIMESTAMP'2021-01-01 00:06:00'), " +
+ "('A2', TIMESTAMP'2021-01-01 00:01:00') AS tab(a, b)")
+ .write
+ .parquet(path.getCanonicalPath)
+
+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("string_timestamp")
+
+ runQueryAndCompare(
+ "SELECT a, window.start, window.end, count(*) as cnt FROM" +
+ " string_timestamp GROUP by a, window(b, '5 minutes') ORDER BY a,
start;") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ }
}
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index 2b4255db4..9a37c4a40 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -306,6 +306,13 @@ trait SparkPlanExecApi {
GenericExpressionTransformer(substraitExprName, children, expr)
}
+ def genPreciseTimestampConversionTransformer(
+ substraitExprName: String,
+ children: Seq[ExpressionTransformer],
+ expr: PreciseTimestampConversion): ExpressionTransformer = {
+ throw new GlutenNotSupportException("PreciseTimestampConversion is not
supported")
+ }
+
/**
* Generate ShuffleDependency for ColumnarShuffleExchangeExec.
*
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index 5d0af9e52..464bbbfd0 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -650,6 +650,12 @@ object ExpressionConverter extends SQLConfHelper with
Logging {
replaceWithExpressionTransformerInternal(s.child, attributeSeq,
expressionsMap),
LiteralTransformer(Literal(s.randomSeed.get))),
s)
+ case c: PreciseTimestampConversion =>
+
BackendsApiManager.getSparkPlanExecApiInstance.genPreciseTimestampConversionTransformer(
+ substraitExprName,
+ Seq(replaceWithExpressionTransformerInternal(c.child, attributeSeq,
expressionsMap)),
+ c
+ )
case expr =>
GenericExpressionTransformer(
substraitExprName,
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index 1eade3da6..230d91005 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -196,6 +196,7 @@ object ExpressionMappings {
Sig[UnixMicros](UNIX_MICROS),
Sig[MillisToTimestamp](TIMESTAMP_MILLIS),
Sig[MicrosToTimestamp](TIMESTAMP_MICROS),
+ Sig[PreciseTimestampConversion](PRECYSE_TIMESTAMP_CONVERSION),
// JSON functions
Sig[GetJsonObject](GET_JSON_OBJECT),
Sig[LengthOfJsonArray](JSON_ARRAY_LENGTH),
diff --git
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index dc98f31a3..f817612a1 100644
---
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -219,6 +219,7 @@ object ExpressionNames {
final val UNIX_MICROS = "unix_micros"
final val TIMESTAMP_MILLIS = "timestamp_millis"
final val TIMESTAMP_MICROS = "timestamp_micros"
+ final val PRECYSE_TIMESTAMP_CONVERSION = "precise_timestamp_conversion"
// JSON functions
final val GET_JSON_OBJECT = "get_json_object"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]