This is an automated email from the ASF dual-hosted git repository.

taiyangli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new cf010ddd90 [GLUTEN-7100][CH] support function 
timestamp_seconds/timestamp_millis/timestamp_micros (#7102)
cf010ddd90 is described below

commit cf010ddd90b663990016a44187133c1ee0dc9e23
Author: 李扬 <[email protected]>
AuthorDate: Wed Sep 11 14:18:49 2024 +0800

    [GLUTEN-7100][CH] support function 
timestamp_seconds/timestamp_millis/timestamp_micros (#7102)
    
    * support function timestamp_xxx
    
    * fix style
    
    * disable ut TIMESTAMP_SECONDS because of 
https://github.com/ClickHouse/ClickHouse/issues/69280
    
    * fix all failed uts
    
    * fix failed uts
---
 .../org/apache/gluten/utils/CHExpressionUtil.scala |  2 -
 .../execution/GlutenFunctionValidateSuite.scala    | 12 +++++
 .../CommonScalarFunctionParser.cpp                 |  2 +
 .../scalar_function_parser/timestampSeconds.cpp    | 58 ++++++++++++++++++++++
 .../gluten/expression/ExpressionMappings.scala     |  1 +
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  8 +--
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  8 +--
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  8 +--
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  8 +--
 .../apache/gluten/expression/ExpressionNames.scala |  1 +
 10 files changed, 90 insertions(+), 18 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index c33fd4ee39..0b8e5cb536 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -214,8 +214,6 @@ object CHExpressionUtil {
     REGR_SXY -> DefaultValidator(),
     TO_UTC_TIMESTAMP -> UtcTimestampValidator(),
     FROM_UTC_TIMESTAMP -> UtcTimestampValidator(),
-    TIMESTAMP_MILLIS -> DefaultValidator(),
-    TIMESTAMP_MICROS -> DefaultValidator(),
     STACK -> DefaultValidator(),
     TRANSFORM_KEYS -> DefaultValidator(),
     TRANSFORM_VALUES -> DefaultValidator(),
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
index e2989b463c..2bbbc00eb1 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
@@ -788,4 +788,16 @@ class GlutenFunctionValidateSuite extends 
GlutenClickHouseWholeStageTransformerS
                 |""".stripMargin
     runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
   }
+
+  test("test function timestamp_seconds/timestamp_millis/timestamp_micros") {
+    val sql = """
+                |SELECT
+                |  id,
+                |  timestamp_seconds(1725453790 + id) as ts_seconds,
+                |  timestamp_millis(1725453790123 + id) as ts_millis,
+                |  timestamp_micros(1725453790123456 + id) as ts_micros
+                |from range(10);
+                |""".stripMargin
+    runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
+  }
 }
diff --git 
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
 
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
index d0780e05ea..37282104c6 100644
--- 
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
+++ 
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
@@ -159,6 +159,8 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(UnixSeconds, 
unix_seconds, toUnixTimestam
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(UnixDate, unix_date, toInt32);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(UnixMillis, unix_millis, 
toUnixTimestamp64Milli);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(UnixMicros, unix_micros, 
toUnixTimestamp64Micro);
+REGISTER_COMMON_SCALAR_FUNCTION_PARSER(TimestampMillis, timestamp_millis, 
fromUnixTimestamp64Milli);
+REGISTER_COMMON_SCALAR_FUNCTION_PARSER(TimestampMicros, timestamp_micros, 
fromUnixTimestamp64Micro);
 
 // array functions
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Array, array, array);
diff --git 
a/cpp-ch/local-engine/Parser/scalar_function_parser/timestampSeconds.cpp 
b/cpp-ch/local-engine/Parser/scalar_function_parser/timestampSeconds.cpp
new file mode 100644
index 0000000000..d1bcb5bb35
--- /dev/null
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/timestampSeconds.cpp
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <DataTypes/IDataType.h>
+#include <Parser/FunctionParser.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+extern const int BAD_ARGUMENTS;
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+}
+
+namespace local_engine
+{
+
+class FunctionParserTimestampSeconds : public FunctionParser
+{
+public:
+    explicit FunctionParserTimestampSeconds(SerializedPlanParser * 
plan_parser_) : FunctionParser(plan_parser_) { }
+    ~FunctionParserTimestampSeconds() override = default;
+
+    static constexpr auto name = "timestamp_seconds";
+
+    String getName() const override { return name; }
+
+    const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction 
& substrait_func, ActionsDAG & actions_dag) const override
+    {
+        /// Parse timestamp_seconds(expr) as toDateTime64(expr, 6)
+        auto parsed_args = parseFunctionArguments(substrait_func, actions_dag);
+        if (parsed_args.size() != 1)
+            throw Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, 
"Function {} requires exactly one argument", getName());
+
+        const auto * arg = parsed_args[0];
+        const auto * precision_node = addColumnToActionsDAG(actions_dag, 
std::make_shared<DataTypeUInt8>(), 6);
+        const auto * to_datetime64_node = toFunctionNode(actions_dag, 
"toDateTime64", {arg, precision_node});
+        return convertNodeTypeIfNeeded(substrait_func, to_datetime64_node, 
actions_dag);
+    }
+};
+
+static FunctionParserRegister<FunctionParserTimestampSeconds> 
register_timestamp_seconds;
+}
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index 38f9de629a..e2f7971ad6 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -199,6 +199,7 @@ object ExpressionMappings {
     Sig[UnixSeconds](UNIX_SECONDS),
     Sig[UnixMillis](UNIX_MILLIS),
     Sig[UnixMicros](UNIX_MICROS),
+    Sig[SecondsToTimestamp](TIMESTAMP_SECONDS),
     Sig[MillisToTimestamp](TIMESTAMP_MILLIS),
     Sig[MicrosToTimestamp](TIMESTAMP_MICROS),
     Sig[PreciseTimestampConversion](PRECYSE_TIMESTAMP_CONVERSION),
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index b527a8ec1f..a3bfafe046 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -431,6 +431,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("analyze empty table")
     .exclude("analyze column command - result verification")
     .exclude("column stats collection for null columns")
+    .exclude("store and retrieve column stats in different time zones")
+    .excludeGlutenTest("store and retrieve column stats in different time 
zones")
   enableSuite[GlutenStringFunctionsSuite]
     .exclude("string regex_replace / regex_extract")
     .exclude("string overlay function")
@@ -740,13 +742,11 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is 
missing")
     .exclude("DATE_FROM_UNIX_DATE")
     .exclude("UNIX_SECONDS")
-    .exclude("TIMESTAMP_SECONDS")
-    .exclude("TIMESTAMP_MILLIS")
-    .exclude("TIMESTAMP_MICROS")
+    .exclude("TIMESTAMP_SECONDS") // refer to 
https://github.com/ClickHouse/ClickHouse/issues/69280
+    .exclude("TIMESTAMP_MICROS") // refer to 
https://github.com/apache/incubator-gluten/issues/7127
     .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with 
parseError")
     .exclude("SPARK-34739,SPARK-35889: add a year-month interval to a 
timestamp")
     .exclude("SPARK-34761,SPARK-35889: add a day-time interval to a timestamp")
-    .excludeGlutenTest("TIMESTAMP_MICROS")
     .excludeGlutenTest("unix_timestamp")
     .excludeGlutenTest("to_unix_timestamp")
     .excludeGlutenTest("Hour")
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 110dbffc73..35fe6108a3 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -489,6 +489,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("analyze empty table")
     .exclude("analyze column command - result verification")
     .exclude("column stats collection for null columns")
+    .exclude("store and retrieve column stats in different time zones")
+    .excludeGlutenTest("store and retrieve column stats in different time 
zones")
   enableSuite[GlutenStringFunctionsSuite]
     .exclude("string regex_replace / regex_extract")
     .exclude("string overlay function")
@@ -769,16 +771,14 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is 
missing")
     .exclude("DATE_FROM_UNIX_DATE")
     .exclude("UNIX_SECONDS")
-    .exclude("TIMESTAMP_SECONDS")
-    .exclude("TIMESTAMP_MILLIS")
-    .exclude("TIMESTAMP_MICROS")
+    .exclude("TIMESTAMP_SECONDS") // refer to 
https://github.com/ClickHouse/ClickHouse/issues/69280
+    .exclude("TIMESTAMP_MICROS") // refer to 
https://github.com/apache/incubator-gluten/issues/7127
     .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with 
parseError")
     .exclude("SPARK-34739,SPARK-35889: add a year-month interval to a 
timestamp")
     .exclude("SPARK-34761,SPARK-35889: add a day-time interval to a timestamp")
     .exclude("SPARK-37552: convert a timestamp_ntz to another time zone")
     .exclude("SPARK-38195: add a quantity of interval units to a timestamp")
     .exclude("SPARK-38284: difference between two timestamps in units")
-    .excludeGlutenTest("TIMESTAMP_MICROS")
     .excludeGlutenTest("unix_timestamp")
     .excludeGlutenTest("to_unix_timestamp")
     .excludeGlutenTest("Hour")
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index d478adbb32..cd68dfc25a 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -488,6 +488,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("analyze empty table")
     .exclude("analyze column command - result verification")
     .exclude("column stats collection for null columns")
+    .exclude("store and retrieve column stats in different time zones")
+    .excludeGlutenTest("store and retrieve column stats in different time 
zones")
   enableSuite[GlutenStringFunctionsSuite]
     .exclude("string regex_replace / regex_extract")
     .exclude("string overlay function")
@@ -657,16 +659,14 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is 
missing")
     .exclude("DATE_FROM_UNIX_DATE")
     .exclude("UNIX_SECONDS")
-    .exclude("TIMESTAMP_SECONDS")
-    .exclude("TIMESTAMP_MILLIS")
-    .exclude("TIMESTAMP_MICROS")
+    .exclude("TIMESTAMP_SECONDS") // refer to 
https://github.com/ClickHouse/ClickHouse/issues/69280
+    .exclude("TIMESTAMP_MICROS") // refer to 
https://github.com/apache/incubator-gluten/issues/7127
     .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with 
parseError")
     .exclude("SPARK-34739,SPARK-35889: add a year-month interval to a 
timestamp")
     .exclude("SPARK-34761,SPARK-35889: add a day-time interval to a timestamp")
     .exclude("SPARK-37552: convert a timestamp_ntz to another time zone")
     .exclude("SPARK-38195: add a quantity of interval units to a timestamp")
     .exclude("SPARK-38284: difference between two timestamps in units")
-    .excludeGlutenTest("TIMESTAMP_MICROS")
     .excludeGlutenTest("unix_timestamp")
     .excludeGlutenTest("to_unix_timestamp")
     .excludeGlutenTest("Hour")
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 089f72cd11..7e73ae9c17 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -488,6 +488,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("analyze empty table")
     .exclude("analyze column command - result verification")
     .exclude("column stats collection for null columns")
+    .exclude("store and retrieve column stats in different time zones")
+    .excludeGlutenTest("store and retrieve column stats in different time 
zones")
   enableSuite[GlutenStringFunctionsSuite]
     .exclude("string regex_replace / regex_extract")
     .exclude("string overlay function")
@@ -657,16 +659,14 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is 
missing")
     .exclude("DATE_FROM_UNIX_DATE")
     .exclude("UNIX_SECONDS")
-    .exclude("TIMESTAMP_SECONDS")
-    .exclude("TIMESTAMP_MILLIS")
-    .exclude("TIMESTAMP_MICROS")
+    .exclude("TIMESTAMP_SECONDS") // refer to 
https://github.com/ClickHouse/ClickHouse/issues/69280
+    .exclude("TIMESTAMP_MICROS") // refer to 
https://github.com/apache/incubator-gluten/issues/7127
     .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with 
parseError")
     .exclude("SPARK-34739,SPARK-35889: add a year-month interval to a 
timestamp")
     .exclude("SPARK-34761,SPARK-35889: add a day-time interval to a timestamp")
     .exclude("SPARK-37552: convert a timestamp_ntz to another time zone")
     .exclude("SPARK-38195: add a quantity of interval units to a timestamp")
     .exclude("SPARK-38284: difference between two timestamps in units")
-    .excludeGlutenTest("TIMESTAMP_MICROS")
     .excludeGlutenTest("unix_timestamp")
     .excludeGlutenTest("to_unix_timestamp")
     .excludeGlutenTest("Hour")
diff --git 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index f198bb7e17..c45f0b2d4e 100644
--- 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++ 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -223,6 +223,7 @@ object ExpressionNames {
   final val UNIX_SECONDS = "unix_seconds"
   final val UNIX_MILLIS = "unix_millis"
   final val UNIX_MICROS = "unix_micros"
+  final val TIMESTAMP_SECONDS = "timestamp_seconds"
   final val TIMESTAMP_MILLIS = "timestamp_millis"
   final val TIMESTAMP_MICROS = "timestamp_micros"
   final val PRECYSE_TIMESTAMP_CONVERSION = "precise_timestamp_conversion"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to