This is an automated email from the ASF dual-hosted git repository.

zhangzc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 95f392197 [GLUTEN-6425][CH] Support day time internval (#6456)
95f392197 is described below

commit 95f3921977995a0820c963ca90d39d958c0ae927
Author: lgbo <[email protected]>
AuthorDate: Wed Jul 17 10:05:41 2024 +0800

    [GLUTEN-6425][CH] Support day time internval (#6456)
    
    [GLUTEN-6425][CH] Support day time internval
---
 .../clickhouse/CHSparkPlanExecApi.scala            |   8 ++
 .../gluten/expression/DateAddTransformer.scala     | 150 +++++++++++++++++++++
 .../GlutenClickHouseTPCHSaltNullParquetSuite.scala |  42 ++++++
 .../Parser/scalar_function_parser/timestampAdd.cpp |  14 +-
 .../gluten/backendsapi/SparkPlanExecApi.scala      |  12 ++
 .../gluten/expression/ExpressionConverter.scala    |  14 ++
 .../gluten/expression/ExpressionMappings.scala     |   1 +
 7 files changed, 236 insertions(+), 5 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
index 58bbf3fc2..547458d8c 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
@@ -897,6 +897,14 @@ class CHSparkPlanExecApi extends SparkPlanExecApi {
     GenericExpressionTransformer(substraitExprName, Seq(argument, function), 
expr)
   }
 
+  override def genDateAddTransformer(
+      attributeSeq: Seq[Attribute],
+      substraitExprName: String,
+      children: Seq[Expression],
+      expr: Expression): ExpressionTransformer = {
+    DateAddTransformer(attributeSeq, substraitExprName, children, 
expr).doTransform()
+  }
+
   override def genPreProjectForGenerate(generate: GenerateExec): SparkPlan = 
generate
 
   override def genPostProjectForGenerate(generate: GenerateExec): SparkPlan = 
generate
diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/expression/DateAddTransformer.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/expression/DateAddTransformer.scala
new file mode 100644
index 000000000..687184985
--- /dev/null
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/expression/DateAddTransformer.scala
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.expression
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+case class DateAddTransformer(
+    attributeSeq: Seq[Attribute],
+    substraitExprName: String,
+    children: Seq[Expression],
+    expr: Expression)
+  extends Logging {
+
+  def doTransform(): ExpressionTransformer = {
+    expr match {
+      case dateAdd: DateAdd =>
+        doDateAddTransform()
+      case timeAdd: TimeAdd =>
+        doTimeAddTransform()
+      case other => doDefaultTransorm()
+    }
+  }
+
+  private def unwrapExtractANSIIntervalDays(extractExpr: Expression): 
Expression = {
+    extractExpr match {
+      case extractDays: ExtractANSIIntervalDays =>
+        extractDays.child match {
+          case literal: Literal if 
literal.dataType.isInstanceOf[DayTimeIntervalType] =>
+            val (intVal, unitVal) = parseDayIntervalType(literal)
+            if (unitVal.equals("DAY")) {
+              Literal(intVal, IntegerType)
+            } else {
+              null
+            }
+          case _ => null
+        }
+      case _ => null
+    }
+  }
+
+  private def doDateAddTransform(): ExpressionTransformer = {
+    children(1) match {
+      case extractDays: ExtractANSIIntervalDays =>
+        val daysLiteral = unwrapExtractANSIIntervalDays(children(1))
+        if (daysLiteral != null) {
+          val daysExpr = LiteralTransformer(daysLiteral)
+          val dateExpr =
+            ExpressionConverter.replaceWithExpressionTransformer(children(0), 
attributeSeq)
+          GenericExpressionTransformer(substraitExprName, Seq(dateExpr, 
daysExpr), expr)
+        } else {
+          doDefaultTransorm()
+        }
+      case minus: UnaryMinus =>
+        val child = minus.child match {
+          case extractDays: ExtractANSIIntervalDays =>
+            unwrapExtractANSIIntervalDays(extractDays)
+          case e => e
+        }
+        if (child != null) {
+          val newMinus = minus.withNewChildren(Seq(child))
+          val dateExpr =
+            ExpressionConverter.replaceWithExpressionTransformer(children(0), 
attributeSeq)
+          val minusExpr =
+            ExpressionConverter.replaceWithExpressionTransformer(newMinus, 
attributeSeq)
+          GenericExpressionTransformer(substraitExprName, Seq(dateExpr, 
minusExpr), expr)
+        } else {
+          doDefaultTransorm()
+        }
+      case _ => doDefaultTransorm()
+    }
+  }
+
+  private def doTimeAddTransform(): ExpressionTransformer = {
+    children(1) match {
+      case literal: Literal if 
literal.dataType.isInstanceOf[DayTimeIntervalType] =>
+        val (intVal, unitVal) = parseDayIntervalType(literal)
+        if (unitVal != null) {
+          val timeExpr =
+            ExpressionConverter.replaceWithExpressionTransformer(children(0), 
attributeSeq)
+
+          val intExpr = Literal(intVal, IntegerType)
+          val unitExpr = Literal(UTF8String.fromString(unitVal), StringType)
+          val newExpr = expr.withNewChildren(Seq(unitExpr, intExpr))
+          GenericExpressionTransformer(
+            substraitExprName,
+            Seq(LiteralTransformer(unitExpr), LiteralTransformer(intVal), 
timeExpr),
+            newExpr)
+        } else {
+          doDefaultTransorm()
+        }
+      case minus: UnaryMinus =>
+        minus.child match {
+          case literal: Literal if 
literal.dataType.isInstanceOf[DayTimeIntervalType] =>
+            val (intVal, unitVal) = parseDayIntervalType(literal)
+            if (unitVal != null) {
+              val intExpr = minus.withNewChildren(Seq(Literal(intVal, 
IntegerType)))
+              val unitExpr = Literal(UTF8String.fromString(unitVal), 
StringType)
+              val newExpr = expr.withNewChildren(Seq(unitExpr, intExpr))
+              GenericExpressionTransformer(
+                substraitExprName,
+                Seq(LiteralTransformer(unitExpr), LiteralTransformer(intVal)),
+                newExpr)
+            } else {
+              doDefaultTransorm()
+            }
+          case _ => doDefaultTransorm()
+        }
+      case _ => doDefaultTransorm()
+    }
+  }
+
+  private def doDefaultTransorm(): ExpressionTransformer = {
+    // transorm it in a generic way
+    val childrenTransformers =
+      children.map(ExpressionConverter.replaceWithExpressionTransformer(_, 
attributeSeq))
+    GenericExpressionTransformer(substraitExprName, childrenTransformers, expr)
+  }
+
+  private def parseDayIntervalType(literal: Literal): (Integer, String) = {
+    literal.dataType match {
+      case dayIntervalType: DayTimeIntervalType =>
+        val intVal = literal.value.asInstanceOf[Long]
+        (dayIntervalType.startField, dayIntervalType.endField) match {
+          case (0, 0) => ((literal.value.asInstanceOf[Long] / 1000000 / 3600 / 
24).toInt, "DAY")
+          case (1, 1) => ((literal.value.asInstanceOf[Long] / 1000000 / 
3600).toInt, "HOUR")
+          case (2, 2) => ((literal.value.asInstanceOf[Long] / 1000000L / 
60L).toInt, "MINUTE")
+          case (3, 3) => (literal.value.asInstanceOf[Long].toInt / 1000000, 
"SECOND")
+          case _ => (0, null)
+        }
+      case _ => (0, null)
+    }
+  }
+}
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index bd8a37d92..36dd3778c 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -2727,5 +2727,47 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
     compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
     spark.sql("drop table test_tbl_4451")
   }
+
+  test("array functions date add") {
+    spark.sql("create table tb_date(day Date) using parquet")
+    spark.sql("""
+                |insert into tb_date values
+                |(cast('2024-06-01' as Date)),
+                |(cast('2024-06-02' as Date)),
+                |(cast('2024-06-03' as Date)),
+                |(cast('2024-06-04' as Date)),
+                |(cast('2024-06-05' as Date))
+                |""".stripMargin)
+    val sql1 = """
+                 |select * from tb_date where day between
+                 |'2024-06-01' and
+                 |cast('2024-06-01' as Date) + interval 2 day
+                 |order by day
+                 |""".stripMargin
+    compareResultsAgainstVanillaSpark(sql1, true, { _ => })
+    val sql2 = """
+                 |select * from tb_date where day between
+                 |'2024-06-01' and
+                 |cast('2024-06-01' as Date) + interval 48 hour
+                 |order by day
+                 |""".stripMargin
+    compareResultsAgainstVanillaSpark(sql2, true, { _ => })
+    val sql3 = """
+                 |select * from tb_date where day between
+                 |'2024-06-01' and
+                 |cast('2024-06-05' as Date) - interval 2 day
+                 |order by day
+                 |""".stripMargin
+    compareResultsAgainstVanillaSpark(sql3, true, { _ => })
+    val sql4 = """
+                 |select * from tb_date where day between
+                 |'2024-06-01' and
+                 |cast('2024-06-05' as Date) - interval 48 hour
+                 |order by day
+                 |""".stripMargin
+    compareResultsAgainstVanillaSpark(sql4, true, { _ => })
+
+    spark.sql("drop table tb_date")
+  }
 }
 // scalastyle:on line.size.limit
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/timestampAdd.cpp 
b/cpp-ch/local-engine/Parser/scalar_function_parser/timestampAdd.cpp
index 6e92a7b92..af81e2bd7 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/timestampAdd.cpp
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/timestampAdd.cpp
@@ -47,23 +47,27 @@ public:
     const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction 
& substrait_func, ActionsDAGPtr & actions_dag) const override
     {
         auto parsed_args = parseFunctionArguments(substrait_func, actions_dag);
-        if (parsed_args.size() != 4)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, 
"Function {} requires exactly four arguments", getName());
+        if (parsed_args.size() < 3)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, 
"Function {} requires at least three arguments", getName());
 
         const auto & unit_field = substrait_func.arguments().at(0);
         if (!unit_field.value().has_literal() || 
!unit_field.value().literal().has_string())
             throw Exception(
                 ErrorCodes::BAD_ARGUMENTS, "Unsupported unit argument, should 
be a string literal, but: {}", unit_field.DebugString());
 
-        const auto & timezone_field = substrait_func.arguments().at(3);
-        if (!timezone_field.value().has_literal() || 
!timezone_field.value().literal().has_string())
+        String timezone;
+        if (parsed_args.size() == 4)
+        {
+            const auto & timezone_field = substrait_func.arguments().at(3);
+            if (!timezone_field.value().has_literal() || 
!timezone_field.value().literal().has_string())
             throw Exception(
                 ErrorCodes::BAD_ARGUMENTS,
                 "Unsupported timezone_field argument, should be a string 
literal, but: {}",
                 timezone_field.DebugString());
+            timezone = timezone_field.value().literal().string();
+        }
 
         const auto & unit = 
Poco::toUpper(unit_field.value().literal().string());
-        auto timezone = timezone_field.value().literal().string();
 
         std::string ch_function_name;
         if (unit == "MICROSECOND")
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index 88af26228..58a08192d 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -322,6 +322,18 @@ trait SparkPlanExecApi {
     throw new GlutenNotSupportException("PreciseTimestampConversion is not 
supported")
   }
 
+  // For date_add(cast('2001-01-01' as Date), interval 1 day), backends may 
handle it in different
+  // ways
+  def genDateAddTransformer(
+      attributeSeq: Seq[Attribute],
+      substraitExprName: String,
+      children: Seq[Expression],
+      expr: Expression): ExpressionTransformer = {
+    val childrenTransformers =
+      children.map(ExpressionConverter.replaceWithExpressionTransformer(_, 
attributeSeq))
+    GenericExpressionTransformer(substraitExprName, childrenTransformers, expr)
+  }
+
   /**
    * Generate ShuffleDependency for ColumnarShuffleExchangeExec.
    *
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index c285c1194..de007167f 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -686,6 +686,20 @@ object ExpressionConverter extends SQLConfHelper with 
Logging {
         LiteralTransformer(Literal(Math.E))
       case p: Pi =>
         LiteralTransformer(Literal(Math.PI))
+      case dateAdd: DateAdd =>
+        BackendsApiManager.getSparkPlanExecApiInstance.genDateAddTransformer(
+          attributeSeq,
+          substraitExprName,
+          dateAdd.children,
+          dateAdd
+        )
+      case timeAdd: TimeAdd =>
+        BackendsApiManager.getSparkPlanExecApiInstance.genDateAddTransformer(
+          attributeSeq,
+          substraitExprName,
+          timeAdd.children,
+          timeAdd
+        )
       case expr =>
         GenericExpressionTransformer(
           substraitExprName,
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index 77e85b354..9012ca804 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -177,6 +177,7 @@ object ExpressionMappings {
     Sig[Second](EXTRACT),
     Sig[FromUnixTime](FROM_UNIXTIME),
     Sig[DateAdd](DATE_ADD),
+    Sig[TimeAdd](TIMESTAMP_ADD),
     Sig[DateSub](DATE_SUB),
     Sig[DateDiff](DATE_DIFF),
     Sig[ToUnixTimestamp](TO_UNIX_TIMESTAMP),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to