This is an automated email from the ASF dual-hosted git repository.

liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new b37a6e426 [GLUTEN-5827][CH]support utc timestamp transfrom (#5828)
b37a6e426 is described below

commit b37a6e42681fa23b94337e7646153e5640e474d7
Author: KevinyhZou <[email protected]>
AuthorDate: Tue Jun 11 16:56:32 2024 +0800

    [GLUTEN-5827][CH]support utc timestamp transfrom (#5828)
    
    What changes were proposed in this pull request?
    (Please fill in changes proposed in this fix)
    
    (Fixes: #5827)
    
    support to_utc_timestamp/from_utc_timestamp function;
    convert timezone like '+08:00,-08:00' to GMT+8 , GMT-8 while this config 
set in spark.sql.session.timezone or in to_utc_timestamp / from_utc_timestamp's 
parameters
    How was this patch tested?
    spark ut
---
 .../org/apache/gluten/utils/CHExpressionUtil.scala | 13 ++++-
 cpp-ch/local-engine/Common/CHUtil.cpp              | 18 ++++---
 cpp-ch/local-engine/Common/CHUtil.h                |  1 +
 cpp-ch/local-engine/Parser/SerializedPlanParser.h  |  2 +-
 .../scalar_function_parser/fromUtcTimestamp.cpp    | 35 +++++++++++++
 .../scalar_function_parser/toUtcTimestamp.cpp      | 35 +++++++++++++
 .../scalar_function_parser/utcTimestampTransform.h | 61 ++++++++++++++++++++++
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  2 -
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  2 -
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  2 -
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  2 -
 11 files changed, 156 insertions(+), 17 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index 94d289594..d47523c0f 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -44,6 +44,15 @@ case class SequenceValidator() extends FunctionValidator {
   }
 }
 
+case class UtcTimestampValidator() extends FunctionValidator {
+  override def doValidate(expr: Expression): Boolean = expr match {
+    // CH backend doest not support non-const timezone parameter
+    case t: ToUTCTimestamp => t.children(1).isInstanceOf[Literal]
+    case f: FromUTCTimestamp => f.children(1).isInstanceOf[Literal]
+    case _ => false
+  }
+}
+
 case class UnixTimeStampValidator() extends FunctionValidator {
   final val DATE_TYPE = "date"
 
@@ -194,8 +203,8 @@ object CHExpressionUtil {
     REGR_SLOPE -> DefaultValidator(),
     REGR_INTERCEPT -> DefaultValidator(),
     REGR_SXY -> DefaultValidator(),
-    TO_UTC_TIMESTAMP -> DefaultValidator(),
-    FROM_UTC_TIMESTAMP -> DefaultValidator(),
+    TO_UTC_TIMESTAMP -> UtcTimestampValidator(),
+    FROM_UTC_TIMESTAMP -> UtcTimestampValidator(),
     UNIX_MILLIS -> DefaultValidator(),
     UNIX_MICROS -> DefaultValidator(),
     TIMESTAMP_MILLIS -> DefaultValidator(),
diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp 
b/cpp-ch/local-engine/Common/CHUtil.cpp
index 62b42f981..fa6124cf0 100644
--- a/cpp-ch/local-engine/Common/CHUtil.cpp
+++ b/cpp-ch/local-engine/Common/CHUtil.cpp
@@ -586,7 +586,7 @@ void 
BackendInitializerUtil::initEnvs(DB::Context::ConfigurationPtr config)
     if (config->has("timezone"))
     {
         const std::string config_timezone = config->getString("timezone");
-        const String mapped_timezone = 
DateLUT::mappingForJavaTimezone(config_timezone);
+        const String mapped_timezone = 
DateTimeUtil::convertTimeZone(config_timezone);
         if (0 != setenv("TZ", mapped_timezone.data(), 1)) // 
NOLINT(concurrency-mt-unsafe) // ok if not called concurrently with other 
setenv/getenv
             throw Poco::Exception("Cannot setenv TZ variable");
 
@@ -659,11 +659,7 @@ void 
BackendInitializerUtil::initSettings(std::map<std::string, std::string> & b
         }
         else if (key == SPARK_SESSION_TIME_ZONE)
         {
-            String time_zone_val = value;
-            /// Convert timezone ID like '+8:00' to GMT+8:00
-            if (value.starts_with("+") || value.starts_with("-"))
-                time_zone_val = "GMT" + value;
-            time_zone_val = DateLUT::mappingForJavaTimezone(time_zone_val);
+            String time_zone_val = DateTimeUtil::convertTimeZone(value);
             settings.set("session_timezone", time_zone_val);
             LOG_DEBUG(&Poco::Logger::get("CHUtil"), "Set settings key:{} 
value:{}", "session_timezone", time_zone_val);
         }
@@ -937,6 +933,16 @@ Int64 DateTimeUtil::currentTimeMillis()
     return timeInMilliseconds(std::chrono::system_clock::now());
 }
 
+String DateTimeUtil::convertTimeZone(const String & time_zone)
+{
+    String res = time_zone;
+    /// Convert timezone ID like '+08:00' to GMT+8:00
+    if (time_zone.starts_with("+") || time_zone.starts_with("-"))
+        res = "GMT" + time_zone;
+    res = DateLUT::mappingForJavaTimezone(res);
+    return res;
+}
+
 UInt64 MemoryUtil::getCurrentMemoryUsage(size_t depth)
 {
     Int64 current_memory_usage = 0;
diff --git a/cpp-ch/local-engine/Common/CHUtil.h 
b/cpp-ch/local-engine/Common/CHUtil.h
index 2ef3c6ef9..50de9461f 100644
--- a/cpp-ch/local-engine/Common/CHUtil.h
+++ b/cpp-ch/local-engine/Common/CHUtil.h
@@ -225,6 +225,7 @@ class DateTimeUtil
 {
 public:
     static Int64 currentTimeMillis();
+    static String convertTimeZone(const String & time_zone);
 };
 
 class MemoryUtil
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h 
b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
index d0a16ec71..c79598c59 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
@@ -57,7 +57,7 @@ static const std::map<std::string, std::string> 
SCALAR_FUNCTIONS
        {"get_timestamp", "parseDateTimeInJodaSyntaxOrNull"}, // for spark 
function: to_date/to_timestamp
        {"quarter", "toQuarter"},
        {"to_unix_timestamp", "parseDateTimeInJodaSyntaxOrNull"},
-       //    {"unix_timestamp", "toUnixTimestamp"},
+       //{"unix_timestamp", "toUnixTimestamp"},
        {"date_format", "formatDateTimeInJodaSyntax"},
        {"timestamp_add", "timestamp_add"},
 
diff --git 
a/cpp-ch/local-engine/Parser/scalar_function_parser/fromUtcTimestamp.cpp 
b/cpp-ch/local-engine/Parser/scalar_function_parser/fromUtcTimestamp.cpp
new file mode 100644
index 000000000..8d2323105
--- /dev/null
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/fromUtcTimestamp.cpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <Parser/scalar_function_parser/utcTimestampTransform.h>
+
+namespace local_engine
+{
+
+class FunctionParserFromUtcTimestamp : public 
FunctionParserUtcTimestampTransform
+{
+public:
+    explicit FunctionParserFromUtcTimestamp(SerializedPlanParser * 
plan_parser_) : FunctionParserUtcTimestampTransform(plan_parser_) { }
+    ~FunctionParserFromUtcTimestamp() = default;
+
+    static constexpr auto name = "from_utc_timestamp";
+    String getCHFunctionName(const substrait::Expression_ScalarFunction &) 
const override { return "from_utc_timestamp"; }
+    String getName() const override { return "from_utc_timestamp"; }
+};
+
+static FunctionParserRegister<FunctionParserFromUtcTimestamp> fromUtcTimestamp;
+}
diff --git 
a/cpp-ch/local-engine/Parser/scalar_function_parser/toUtcTimestamp.cpp 
b/cpp-ch/local-engine/Parser/scalar_function_parser/toUtcTimestamp.cpp
new file mode 100644
index 000000000..4b04942ba
--- /dev/null
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/toUtcTimestamp.cpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <Parser/scalar_function_parser/utcTimestampTransform.h>
+
+namespace local_engine
+{
+
+class FunctionParserToUtcTimestamp : public FunctionParserUtcTimestampTransform
+{
+public:
+    explicit FunctionParserToUtcTimestamp(SerializedPlanParser * plan_parser_) 
: FunctionParserUtcTimestampTransform(plan_parser_) { }
+    ~FunctionParserToUtcTimestamp() = default;
+
+    static constexpr auto name = "to_utc_timestamp";
+    String getCHFunctionName(const substrait::Expression_ScalarFunction &) 
const override { return "to_utc_timestamp"; }
+    String getName() const override { return "to_utc_timestamp"; }
+};
+
+static FunctionParserRegister<FunctionParserToUtcTimestamp> toUtcTimestamp;
+}
diff --git 
a/cpp-ch/local-engine/Parser/scalar_function_parser/utcTimestampTransform.h 
b/cpp-ch/local-engine/Parser/scalar_function_parser/utcTimestampTransform.h
new file mode 100644
index 000000000..87ea19024
--- /dev/null
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/utcTimestampTransform.h
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Parser/FunctionParser.h>
+#include <Common/CHUtil.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+}
+
+namespace local_engine
+{
+
+class FunctionParserUtcTimestampTransform : public FunctionParser
+{
+public:
+    explicit FunctionParserUtcTimestampTransform(SerializedPlanParser * 
plan_parser_) : FunctionParser(plan_parser_) { }
+    ~FunctionParserUtcTimestampTransform() override = default;
+
+    const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction 
& substrait_func, ActionsDAGPtr & actions_dag) const override
+    {
+        /// Convert timezone value to clickhouse backend supported, i.e. GMT+8 
-> Etc/GMT-8, +08:00 -> Etc/GMT-8
+        if (substrait_func.arguments_size() != 2)
+            throw 
DB::Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s 
must have 2 arguments", getName());
+        
+        const substrait::Expression & arg1 = 
substrait_func.arguments()[1].value();
+        if (!arg1.has_literal() || !arg1.literal().has_string())
+            throw DB::Exception(DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,  
"Function {}'s 2nd argument should be string literal", getName());
+        
+        const String & arg1_literal = arg1.literal().string();
+        String time_zone_val = DateTimeUtil::convertTimeZone(arg1_literal);
+        auto parsed_args = parseFunctionArguments(substrait_func, "", 
actions_dag);
+        auto nullable_string_type = 
DB::makeNullable(std::make_shared<DB::DataTypeString>());
+        const auto * time_zone_node = addColumnToActionsDAG(actions_dag, 
nullable_string_type, time_zone_val);
+        const auto * result_node = toFunctionNode(actions_dag, 
getCHFunctionName(substrait_func), {parsed_args[0], time_zone_node});
+        return convertNodeTypeIfNeeded(substrait_func, result_node, 
actions_dag);
+    }
+};
+}
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 2c34baa63..a8a5a1e41 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -300,9 +300,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-30793: truncate timestamps before the epoch to seconds and 
minutes")
     .excludeGlutenTest("unix_timestamp")
     .excludeGlutenTest("to_unix_timestamp")
-    .exclude("to_utc_timestamp with literal zone")
     .exclude("to_utc_timestamp with column zone")
-    .exclude("from_utc_timestamp with literal zone")
     .exclude("from_utc_timestamp with column zone")
   enableSuite[GlutenDeprecatedAPISuite]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index bb782fde3..cb33f0025 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -326,9 +326,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-30793: truncate timestamps before the epoch to seconds and 
minutes")
     .excludeGlutenTest("unix_timestamp")
     .excludeGlutenTest("to_unix_timestamp")
-    .exclude("to_utc_timestamp with literal zone")
     .exclude("to_utc_timestamp with column zone")
-    .exclude("from_utc_timestamp with literal zone")
     .exclude("from_utc_timestamp with column zone")
   enableSuite[GlutenDeprecatedAPISuite]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 7a3877427..07af1fa84 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -324,9 +324,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-30793: truncate timestamps before the epoch to seconds and 
minutes")
     .excludeGlutenTest("unix_timestamp")
     .excludeGlutenTest("to_unix_timestamp")
-    .exclude("to_utc_timestamp with literal zone")
     .exclude("to_utc_timestamp with column zone")
-    .exclude("from_utc_timestamp with literal zone")
     .exclude("from_utc_timestamp with column zone")
   enableSuite[GlutenDeprecatedAPISuite]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 7a3877427..07af1fa84 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -324,9 +324,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-30793: truncate timestamps before the epoch to seconds and 
minutes")
     .excludeGlutenTest("unix_timestamp")
     .excludeGlutenTest("to_unix_timestamp")
-    .exclude("to_utc_timestamp with literal zone")
     .exclude("to_utc_timestamp with column zone")
-    .exclude("from_utc_timestamp with literal zone")
     .exclude("from_utc_timestamp with column zone")
   enableSuite[GlutenDeprecatedAPISuite]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff].excludeGlutenTest(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to