This is an automated email from the ASF dual-hosted git repository.
mingliang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new fac063117c [GLUTEN-9779][VL] Remove redundant GetTimestamp (#9782)
fac063117c is described below
commit fac063117c7072c6528d4841f6b0b9275303d836
Author: Mingliang Zhu <[email protected]>
AuthorDate: Thu May 29 16:50:30 2025 +0800
[GLUTEN-9779][VL] Remove redundant GetTimestamp (#9782)
---
.../gluten/backendsapi/velox/VeloxRuleApi.scala | 2 ++
.../functions/DateFunctionsValidateSuite.scala | 30 ++++++++++++++++++
.../columnar/EliminateRedundantGetTimestamp.scala | 36 ++++++++++++++++++++++
3 files changed, 68 insertions(+)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala
index f7246605b8..f53e28bd93 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala
@@ -71,6 +71,7 @@ object VeloxRuleApi {
injector.injectPreTransform(_ => RewriteSubqueryBroadcast())
injector.injectPreTransform(c =>
BloomFilterMightContainJointRewriteRule.apply(c.session))
injector.injectPreTransform(c => ArrowScanReplaceRule.apply(c.session))
+ injector.injectPreTransform(_ => EliminateRedundantGetTimestamp)
// Legacy: The legacy transform rule.
val offloads = Seq(OffloadOthers(), OffloadExchange(),
OffloadJoin()).map(_.toStrcitRule())
@@ -133,6 +134,7 @@ object VeloxRuleApi {
injector.injectPreTransform(_ => RewriteSubqueryBroadcast())
injector.injectPreTransform(c =>
BloomFilterMightContainJointRewriteRule.apply(c.session))
injector.injectPreTransform(c => ArrowScanReplaceRule.apply(c.session))
+ injector.injectPreTransform(_ => EliminateRedundantGetTimestamp)
// Gluten RAS: The RAS rule.
val validatorBuilder: GlutenConfig => Validator = conf =>
Validators.newValidator(conf)
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
index 90558d2f60..b82f62187b 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
@@ -277,6 +277,36 @@ abstract class DateFunctionsValidateSuite extends
FunctionsValidateSuite {
}
}
+ test("to_date") {
+ withTempPath {
+ path =>
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00.012")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59.012")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59.001")
+ Seq(t1, t2, t3).toDF("t").write.parquet(path.getCanonicalPath)
+
+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("time")
+ runQueryAndCompare("select to_date(t, 'yyyy-MM') from time") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ }
+
+ test("to_timestamp") {
+ withTempPath {
+ path =>
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00.012")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59.012")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59.001")
+ Seq(t1, t2, t3).toDF("t").write.parquet(path.getCanonicalPath)
+
+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("time")
+ runQueryAndCompare("select to_timestamp(t, 'yyyy-MM') from time") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ }
+
test("Test to_utc_timestamp function") {
withTempPath {
path =>
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/EliminateRedundantGetTimestamp.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/EliminateRedundantGetTimestamp.scala
new file mode 100644
index 0000000000..ff90b4744d
--- /dev/null
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/EliminateRedundantGetTimestamp.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.extension.columnar
+
+import org.apache.spark.sql.catalyst.expressions.GetTimestamp
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.SparkPlan
+
+/**
+ * When there is a format parameter, to_date and to_timestamp will be replaced
by GetTimestamp. If
+ * the date type of GetTimestamp is the same as its left, then GetTimestamp is
redundant. Velox does
+ * not support GetTimestamp(Timestamp, format). In this case, it needs to be
removed.
+ */
+object EliminateRedundantGetTimestamp extends Rule[SparkPlan] {
+
+ override def apply(plan: SparkPlan): SparkPlan = {
+ plan.transformExpressions {
+ case getTimestamp: GetTimestamp if getTimestamp.left.dataType ==
getTimestamp.dataType =>
+ getTimestamp.left
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]