This is an automated email from the ASF dual-hosted git repository.
liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new b37a6e426 [GLUTEN-5827][CH]support utc timestamp transfrom (#5828)
b37a6e426 is described below
commit b37a6e42681fa23b94337e7646153e5640e474d7
Author: KevinyhZou <[email protected]>
AuthorDate: Tue Jun 11 16:56:32 2024 +0800
[GLUTEN-5827][CH]support utc timestamp transfrom (#5828)
What changes were proposed in this pull request?
(Please fill in changes proposed in this fix)
(Fixes: #5827)
support to_utc_timestamp/from_utc_timestamp function;
convert timezone like '+08:00,-08:00' to GMT+8 , GMT-8 while this config
set in spark.sql.session.timezone or in to_utc_timestamp / from_utc_timestamp's
parameters
How was this patch tested?
spark ut
---
.../org/apache/gluten/utils/CHExpressionUtil.scala | 13 ++++-
cpp-ch/local-engine/Common/CHUtil.cpp | 18 ++++---
cpp-ch/local-engine/Common/CHUtil.h | 1 +
cpp-ch/local-engine/Parser/SerializedPlanParser.h | 2 +-
.../scalar_function_parser/fromUtcTimestamp.cpp | 35 +++++++++++++
.../scalar_function_parser/toUtcTimestamp.cpp | 35 +++++++++++++
.../scalar_function_parser/utcTimestampTransform.h | 61 ++++++++++++++++++++++
.../utils/clickhouse/ClickHouseTestSettings.scala | 2 -
.../utils/clickhouse/ClickHouseTestSettings.scala | 2 -
.../utils/clickhouse/ClickHouseTestSettings.scala | 2 -
.../utils/clickhouse/ClickHouseTestSettings.scala | 2 -
11 files changed, 156 insertions(+), 17 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index 94d289594..d47523c0f 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -44,6 +44,15 @@ case class SequenceValidator() extends FunctionValidator {
}
}
+case class UtcTimestampValidator() extends FunctionValidator {
+ override def doValidate(expr: Expression): Boolean = expr match {
+ // CH backend doest not support non-const timezone parameter
+ case t: ToUTCTimestamp => t.children(1).isInstanceOf[Literal]
+ case f: FromUTCTimestamp => f.children(1).isInstanceOf[Literal]
+ case _ => false
+ }
+}
+
case class UnixTimeStampValidator() extends FunctionValidator {
final val DATE_TYPE = "date"
@@ -194,8 +203,8 @@ object CHExpressionUtil {
REGR_SLOPE -> DefaultValidator(),
REGR_INTERCEPT -> DefaultValidator(),
REGR_SXY -> DefaultValidator(),
- TO_UTC_TIMESTAMP -> DefaultValidator(),
- FROM_UTC_TIMESTAMP -> DefaultValidator(),
+ TO_UTC_TIMESTAMP -> UtcTimestampValidator(),
+ FROM_UTC_TIMESTAMP -> UtcTimestampValidator(),
UNIX_MILLIS -> DefaultValidator(),
UNIX_MICROS -> DefaultValidator(),
TIMESTAMP_MILLIS -> DefaultValidator(),
diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp
b/cpp-ch/local-engine/Common/CHUtil.cpp
index 62b42f981..fa6124cf0 100644
--- a/cpp-ch/local-engine/Common/CHUtil.cpp
+++ b/cpp-ch/local-engine/Common/CHUtil.cpp
@@ -586,7 +586,7 @@ void
BackendInitializerUtil::initEnvs(DB::Context::ConfigurationPtr config)
if (config->has("timezone"))
{
const std::string config_timezone = config->getString("timezone");
- const String mapped_timezone =
DateLUT::mappingForJavaTimezone(config_timezone);
+ const String mapped_timezone =
DateTimeUtil::convertTimeZone(config_timezone);
if (0 != setenv("TZ", mapped_timezone.data(), 1)) //
NOLINT(concurrency-mt-unsafe) // ok if not called concurrently with other
setenv/getenv
throw Poco::Exception("Cannot setenv TZ variable");
@@ -659,11 +659,7 @@ void
BackendInitializerUtil::initSettings(std::map<std::string, std::string> & b
}
else if (key == SPARK_SESSION_TIME_ZONE)
{
- String time_zone_val = value;
- /// Convert timezone ID like '+8:00' to GMT+8:00
- if (value.starts_with("+") || value.starts_with("-"))
- time_zone_val = "GMT" + value;
- time_zone_val = DateLUT::mappingForJavaTimezone(time_zone_val);
+ String time_zone_val = DateTimeUtil::convertTimeZone(value);
settings.set("session_timezone", time_zone_val);
LOG_DEBUG(&Poco::Logger::get("CHUtil"), "Set settings key:{}
value:{}", "session_timezone", time_zone_val);
}
@@ -937,6 +933,16 @@ Int64 DateTimeUtil::currentTimeMillis()
return timeInMilliseconds(std::chrono::system_clock::now());
}
+String DateTimeUtil::convertTimeZone(const String & time_zone)
+{
+ String res = time_zone;
+ /// Convert timezone ID like '+08:00' to GMT+8:00
+ if (time_zone.starts_with("+") || time_zone.starts_with("-"))
+ res = "GMT" + time_zone;
+ res = DateLUT::mappingForJavaTimezone(res);
+ return res;
+}
+
UInt64 MemoryUtil::getCurrentMemoryUsage(size_t depth)
{
Int64 current_memory_usage = 0;
diff --git a/cpp-ch/local-engine/Common/CHUtil.h
b/cpp-ch/local-engine/Common/CHUtil.h
index 2ef3c6ef9..50de9461f 100644
--- a/cpp-ch/local-engine/Common/CHUtil.h
+++ b/cpp-ch/local-engine/Common/CHUtil.h
@@ -225,6 +225,7 @@ class DateTimeUtil
{
public:
static Int64 currentTimeMillis();
+ static String convertTimeZone(const String & time_zone);
};
class MemoryUtil
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h
b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
index d0a16ec71..c79598c59 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
@@ -57,7 +57,7 @@ static const std::map<std::string, std::string>
SCALAR_FUNCTIONS
{"get_timestamp", "parseDateTimeInJodaSyntaxOrNull"}, // for spark
function: to_date/to_timestamp
{"quarter", "toQuarter"},
{"to_unix_timestamp", "parseDateTimeInJodaSyntaxOrNull"},
- // {"unix_timestamp", "toUnixTimestamp"},
+ //{"unix_timestamp", "toUnixTimestamp"},
{"date_format", "formatDateTimeInJodaSyntax"},
{"timestamp_add", "timestamp_add"},
diff --git
a/cpp-ch/local-engine/Parser/scalar_function_parser/fromUtcTimestamp.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/fromUtcTimestamp.cpp
new file mode 100644
index 000000000..8d2323105
--- /dev/null
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/fromUtcTimestamp.cpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <Parser/scalar_function_parser/utcTimestampTransform.h>
+
+namespace local_engine
+{
+
+class FunctionParserFromUtcTimestamp : public
FunctionParserUtcTimestampTransform
+{
+public:
+ explicit FunctionParserFromUtcTimestamp(SerializedPlanParser *
plan_parser_) : FunctionParserUtcTimestampTransform(plan_parser_) { }
+ ~FunctionParserFromUtcTimestamp() = default;
+
+ static constexpr auto name = "from_utc_timestamp";
+ String getCHFunctionName(const substrait::Expression_ScalarFunction &)
const override { return "from_utc_timestamp"; }
+ String getName() const override { return "from_utc_timestamp"; }
+};
+
+static FunctionParserRegister<FunctionParserFromUtcTimestamp> fromUtcTimestamp;
+}
diff --git
a/cpp-ch/local-engine/Parser/scalar_function_parser/toUtcTimestamp.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/toUtcTimestamp.cpp
new file mode 100644
index 000000000..4b04942ba
--- /dev/null
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/toUtcTimestamp.cpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <Parser/scalar_function_parser/utcTimestampTransform.h>
+
+namespace local_engine
+{
+
+class FunctionParserToUtcTimestamp : public FunctionParserUtcTimestampTransform
+{
+public:
+ explicit FunctionParserToUtcTimestamp(SerializedPlanParser * plan_parser_)
: FunctionParserUtcTimestampTransform(plan_parser_) { }
+ ~FunctionParserToUtcTimestamp() = default;
+
+ static constexpr auto name = "to_utc_timestamp";
+ String getCHFunctionName(const substrait::Expression_ScalarFunction &)
const override { return "to_utc_timestamp"; }
+ String getName() const override { return "to_utc_timestamp"; }
+};
+
+static FunctionParserRegister<FunctionParserToUtcTimestamp> toUtcTimestamp;
+}
diff --git
a/cpp-ch/local-engine/Parser/scalar_function_parser/utcTimestampTransform.h
b/cpp-ch/local-engine/Parser/scalar_function_parser/utcTimestampTransform.h
new file mode 100644
index 000000000..87ea19024
--- /dev/null
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/utcTimestampTransform.h
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Parser/FunctionParser.h>
+#include <Common/CHUtil.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+}
+
+namespace local_engine
+{
+
+class FunctionParserUtcTimestampTransform : public FunctionParser
+{
+public:
+ explicit FunctionParserUtcTimestampTransform(SerializedPlanParser *
plan_parser_) : FunctionParser(plan_parser_) { }
+ ~FunctionParserUtcTimestampTransform() override = default;
+
+ const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction
& substrait_func, ActionsDAGPtr & actions_dag) const override
+ {
+ /// Convert timezone value to clickhouse backend supported, i.e. GMT+8
-> Etc/GMT-8, +08:00 -> Etc/GMT-8
+ if (substrait_func.arguments_size() != 2)
+ throw
DB::Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s
must have 2 arguments", getName());
+
+ const substrait::Expression & arg1 =
substrait_func.arguments()[1].value();
+ if (!arg1.has_literal() || !arg1.literal().has_string())
+ throw DB::Exception(DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Function {}'s 2nd argument should be string literal", getName());
+
+ const String & arg1_literal = arg1.literal().string();
+ String time_zone_val = DateTimeUtil::convertTimeZone(arg1_literal);
+ auto parsed_args = parseFunctionArguments(substrait_func, "",
actions_dag);
+ auto nullable_string_type =
DB::makeNullable(std::make_shared<DB::DataTypeString>());
+ const auto * time_zone_node = addColumnToActionsDAG(actions_dag,
nullable_string_type, time_zone_val);
+ const auto * result_node = toFunctionNode(actions_dag,
getCHFunctionName(substrait_func), {parsed_args[0], time_zone_node});
+ return convertNodeTypeIfNeeded(substrait_func, result_node,
actions_dag);
+ }
+};
+}
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 2c34baa63..a8a5a1e41 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -300,9 +300,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-30793: truncate timestamps before the epoch to seconds and
minutes")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
- .exclude("to_utc_timestamp with literal zone")
.exclude("to_utc_timestamp with column zone")
- .exclude("from_utc_timestamp with literal zone")
.exclude("from_utc_timestamp with column zone")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index bb782fde3..cb33f0025 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -326,9 +326,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-30793: truncate timestamps before the epoch to seconds and
minutes")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
- .exclude("to_utc_timestamp with literal zone")
.exclude("to_utc_timestamp with column zone")
- .exclude("from_utc_timestamp with literal zone")
.exclude("from_utc_timestamp with column zone")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 7a3877427..07af1fa84 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -324,9 +324,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-30793: truncate timestamps before the epoch to seconds and
minutes")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
- .exclude("to_utc_timestamp with literal zone")
.exclude("to_utc_timestamp with column zone")
- .exclude("from_utc_timestamp with literal zone")
.exclude("from_utc_timestamp with column zone")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 7a3877427..07af1fa84 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -324,9 +324,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-30793: truncate timestamps before the epoch to seconds and
minutes")
.excludeGlutenTest("unix_timestamp")
.excludeGlutenTest("to_unix_timestamp")
- .exclude("to_utc_timestamp with literal zone")
.exclude("to_utc_timestamp with column zone")
- .exclude("from_utc_timestamp with literal zone")
.exclude("from_utc_timestamp with column zone")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]