This is an automated email from the ASF dual-hosted git repository.
taiyangli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new cf010ddd90 [GLUTEN-7100][CH] support function
timestamp_seconds/timestamp_millis/timestamp_micros (#7102)
cf010ddd90 is described below
commit cf010ddd90b663990016a44187133c1ee0dc9e23
Author: 李扬 <[email protected]>
AuthorDate: Wed Sep 11 14:18:49 2024 +0800
[GLUTEN-7100][CH] support function
timestamp_seconds/timestamp_millis/timestamp_micros (#7102)
* support function timestamp_xxx
* fix style
* disable ut TIMESTAMP_SECONDS because of
https://github.com/ClickHouse/ClickHouse/issues/69280
* fix all failed uts
* fix failed uts
---
.../org/apache/gluten/utils/CHExpressionUtil.scala | 2 -
.../execution/GlutenFunctionValidateSuite.scala | 12 +++++
.../CommonScalarFunctionParser.cpp | 2 +
.../scalar_function_parser/timestampSeconds.cpp | 58 ++++++++++++++++++++++
.../gluten/expression/ExpressionMappings.scala | 1 +
.../utils/clickhouse/ClickHouseTestSettings.scala | 8 +--
.../utils/clickhouse/ClickHouseTestSettings.scala | 8 +--
.../utils/clickhouse/ClickHouseTestSettings.scala | 8 +--
.../utils/clickhouse/ClickHouseTestSettings.scala | 8 +--
.../apache/gluten/expression/ExpressionNames.scala | 1 +
10 files changed, 90 insertions(+), 18 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index c33fd4ee39..0b8e5cb536 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -214,8 +214,6 @@ object CHExpressionUtil {
REGR_SXY -> DefaultValidator(),
TO_UTC_TIMESTAMP -> UtcTimestampValidator(),
FROM_UTC_TIMESTAMP -> UtcTimestampValidator(),
- TIMESTAMP_MILLIS -> DefaultValidator(),
- TIMESTAMP_MICROS -> DefaultValidator(),
STACK -> DefaultValidator(),
TRANSFORM_KEYS -> DefaultValidator(),
TRANSFORM_VALUES -> DefaultValidator(),
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
index e2989b463c..2bbbc00eb1 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
@@ -788,4 +788,16 @@ class GlutenFunctionValidateSuite extends
GlutenClickHouseWholeStageTransformerS
|""".stripMargin
runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
}
+
+ test("test function timestamp_seconds/timestamp_millis/timestamp_micros") {
+ val sql = """
+ |SELECT
+ | id,
+ | timestamp_seconds(1725453790 + id) as ts_seconds,
+ | timestamp_millis(1725453790123 + id) as ts_millis,
+ | timestamp_micros(1725453790123456 + id) as ts_micros
+ |from range(10);
+ |""".stripMargin
+ runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
+ }
}
diff --git
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
index d0780e05ea..37282104c6 100644
---
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
+++
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
@@ -159,6 +159,8 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(UnixSeconds,
unix_seconds, toUnixTimestam
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(UnixDate, unix_date, toInt32);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(UnixMillis, unix_millis,
toUnixTimestamp64Milli);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(UnixMicros, unix_micros,
toUnixTimestamp64Micro);
+REGISTER_COMMON_SCALAR_FUNCTION_PARSER(TimestampMillis, timestamp_millis,
fromUnixTimestamp64Milli);
+REGISTER_COMMON_SCALAR_FUNCTION_PARSER(TimestampMicros, timestamp_micros,
fromUnixTimestamp64Micro);
// array functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Array, array, array);
diff --git
a/cpp-ch/local-engine/Parser/scalar_function_parser/timestampSeconds.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/timestampSeconds.cpp
new file mode 100644
index 0000000000..d1bcb5bb35
--- /dev/null
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/timestampSeconds.cpp
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <DataTypes/IDataType.h>
+#include <Parser/FunctionParser.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+extern const int BAD_ARGUMENTS;
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+}
+
+namespace local_engine
+{
+
+class FunctionParserTimestampSeconds : public FunctionParser
+{
+public:
+ explicit FunctionParserTimestampSeconds(SerializedPlanParser *
plan_parser_) : FunctionParser(plan_parser_) { }
+ ~FunctionParserTimestampSeconds() override = default;
+
+ static constexpr auto name = "timestamp_seconds";
+
+ String getName() const override { return name; }
+
+ const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction
& substrait_func, ActionsDAG & actions_dag) const override
+ {
+ /// Parse timestamp_seconds(expr) as toDateTime64(expr, 6)
+ auto parsed_args = parseFunctionArguments(substrait_func, actions_dag);
+ if (parsed_args.size() != 1)
+ throw Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} requires exactly one argument", getName());
+
+ const auto * arg = parsed_args[0];
+ const auto * precision_node = addColumnToActionsDAG(actions_dag,
std::make_shared<DataTypeUInt8>(), 6);
+ const auto * to_datetime64_node = toFunctionNode(actions_dag,
"toDateTime64", {arg, precision_node});
+ return convertNodeTypeIfNeeded(substrait_func, to_datetime64_node,
actions_dag);
+ }
+};
+
+static FunctionParserRegister<FunctionParserTimestampSeconds>
register_timestamp_seconds;
+}
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index 38f9de629a..e2f7971ad6 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -199,6 +199,7 @@ object ExpressionMappings {
Sig[UnixSeconds](UNIX_SECONDS),
Sig[UnixMillis](UNIX_MILLIS),
Sig[UnixMicros](UNIX_MICROS),
+ Sig[SecondsToTimestamp](TIMESTAMP_SECONDS),
Sig[MillisToTimestamp](TIMESTAMP_MILLIS),
Sig[MicrosToTimestamp](TIMESTAMP_MICROS),
Sig[PreciseTimestampConversion](PRECYSE_TIMESTAMP_CONVERSION),
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index b527a8ec1f..a3bfafe046 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -431,6 +431,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("analyze empty table")
.exclude("analyze column command - result verification")
.exclude("column stats collection for null columns")
+ .exclude("store and retrieve column stats in different time zones")
+ .excludeGlutenTest("store and retrieve column stats in different time
zones")
enableSuite[GlutenStringFunctionsSuite]
.exclude("string regex_replace / regex_extract")
.exclude("string overlay function")
@@ -740,13 +742,11 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is
missing")
.exclude("DATE_FROM_UNIX_DATE")
.exclude("UNIX_SECONDS")
- .exclude("TIMESTAMP_SECONDS")
- .exclude("TIMESTAMP_MILLIS")
- .exclude("TIMESTAMP_MICROS")
+ .exclude("TIMESTAMP_SECONDS") // refer to
https://github.com/ClickHouse/ClickHouse/issues/69280
+ .exclude("TIMESTAMP_MICROS") // refer to
https://github.com/apache/incubator-gluten/issues/7127
.exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with
parseError")
.exclude("SPARK-34739,SPARK-35889: add a year-month interval to a
timestamp")
.exclude("SPARK-34761,SPARK-35889: add a day-time interval to a timestamp")
- .excludeGlutenTest("TIMESTAMP_MICROS")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
.excludeGlutenTest("Hour")
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 110dbffc73..35fe6108a3 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -489,6 +489,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("analyze empty table")
.exclude("analyze column command - result verification")
.exclude("column stats collection for null columns")
+ .exclude("store and retrieve column stats in different time zones")
+ .excludeGlutenTest("store and retrieve column stats in different time
zones")
enableSuite[GlutenStringFunctionsSuite]
.exclude("string regex_replace / regex_extract")
.exclude("string overlay function")
@@ -769,16 +771,14 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is
missing")
.exclude("DATE_FROM_UNIX_DATE")
.exclude("UNIX_SECONDS")
- .exclude("TIMESTAMP_SECONDS")
- .exclude("TIMESTAMP_MILLIS")
- .exclude("TIMESTAMP_MICROS")
+ .exclude("TIMESTAMP_SECONDS") // refer to
https://github.com/ClickHouse/ClickHouse/issues/69280
+ .exclude("TIMESTAMP_MICROS") // refer to
https://github.com/apache/incubator-gluten/issues/7127
.exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with
parseError")
.exclude("SPARK-34739,SPARK-35889: add a year-month interval to a
timestamp")
.exclude("SPARK-34761,SPARK-35889: add a day-time interval to a timestamp")
.exclude("SPARK-37552: convert a timestamp_ntz to another time zone")
.exclude("SPARK-38195: add a quantity of interval units to a timestamp")
.exclude("SPARK-38284: difference between two timestamps in units")
- .excludeGlutenTest("TIMESTAMP_MICROS")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
.excludeGlutenTest("Hour")
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index d478adbb32..cd68dfc25a 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -488,6 +488,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("analyze empty table")
.exclude("analyze column command - result verification")
.exclude("column stats collection for null columns")
+ .exclude("store and retrieve column stats in different time zones")
+ .excludeGlutenTest("store and retrieve column stats in different time
zones")
enableSuite[GlutenStringFunctionsSuite]
.exclude("string regex_replace / regex_extract")
.exclude("string overlay function")
@@ -657,16 +659,14 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is
missing")
.exclude("DATE_FROM_UNIX_DATE")
.exclude("UNIX_SECONDS")
- .exclude("TIMESTAMP_SECONDS")
- .exclude("TIMESTAMP_MILLIS")
- .exclude("TIMESTAMP_MICROS")
+ .exclude("TIMESTAMP_SECONDS") // refer to
https://github.com/ClickHouse/ClickHouse/issues/69280
+ .exclude("TIMESTAMP_MICROS") // refer to
https://github.com/apache/incubator-gluten/issues/7127
.exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with
parseError")
.exclude("SPARK-34739,SPARK-35889: add a year-month interval to a
timestamp")
.exclude("SPARK-34761,SPARK-35889: add a day-time interval to a timestamp")
.exclude("SPARK-37552: convert a timestamp_ntz to another time zone")
.exclude("SPARK-38195: add a quantity of interval units to a timestamp")
.exclude("SPARK-38284: difference between two timestamps in units")
- .excludeGlutenTest("TIMESTAMP_MICROS")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
.excludeGlutenTest("Hour")
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 089f72cd11..7e73ae9c17 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -488,6 +488,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("analyze empty table")
.exclude("analyze column command - result verification")
.exclude("column stats collection for null columns")
+ .exclude("store and retrieve column stats in different time zones")
+ .excludeGlutenTest("store and retrieve column stats in different time
zones")
enableSuite[GlutenStringFunctionsSuite]
.exclude("string regex_replace / regex_extract")
.exclude("string overlay function")
@@ -657,16 +659,14 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-31896: Handle am-pm timestamp parsing when hour is
missing")
.exclude("DATE_FROM_UNIX_DATE")
.exclude("UNIX_SECONDS")
- .exclude("TIMESTAMP_SECONDS")
- .exclude("TIMESTAMP_MILLIS")
- .exclude("TIMESTAMP_MICROS")
+ .exclude("TIMESTAMP_SECONDS") // refer to
https://github.com/ClickHouse/ClickHouse/issues/69280
+ .exclude("TIMESTAMP_MICROS") // refer to
https://github.com/apache/incubator-gluten/issues/7127
.exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with
parseError")
.exclude("SPARK-34739,SPARK-35889: add a year-month interval to a
timestamp")
.exclude("SPARK-34761,SPARK-35889: add a day-time interval to a timestamp")
.exclude("SPARK-37552: convert a timestamp_ntz to another time zone")
.exclude("SPARK-38195: add a quantity of interval units to a timestamp")
.exclude("SPARK-38284: difference between two timestamps in units")
- .excludeGlutenTest("TIMESTAMP_MICROS")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
.excludeGlutenTest("Hour")
diff --git
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index f198bb7e17..c45f0b2d4e 100644
---
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -223,6 +223,7 @@ object ExpressionNames {
final val UNIX_SECONDS = "unix_seconds"
final val UNIX_MILLIS = "unix_millis"
final val UNIX_MICROS = "unix_micros"
+ final val TIMESTAMP_SECONDS = "timestamp_seconds"
final val TIMESTAMP_MILLIS = "timestamp_millis"
final val TIMESTAMP_MICROS = "timestamp_micros"
final val PRECYSE_TIMESTAMP_CONVERSION = "precise_timestamp_conversion"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]