This is an automated email from the ASF dual-hosted git repository.

lgbo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 9acc625a52 [GLUTEN-10980][CH]Fix unix_timestamp diff when format is 
not `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss` (#10981)
9acc625a52 is described below

commit 9acc625a52dedb2d8e8aa8eb56d938d66d325033
Author: kevinyhzou <[email protected]>
AuthorDate: Fri Oct 31 14:21:25 2025 +0800

    [GLUTEN-10980][CH]Fix unix_timestamp diff when format is not `yyyy-MM-dd` 
or `yyyy-MM-dd HH:mm:ss` (#10981)
    
    * Fix unix_timestamp
    
    * rename to checkFormat
    
    * Fix reviews
---
 .../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 24 ++++++++++++++++--
 .../Parser/scalar_function_parser/getTimestamp.h   | 29 ++++++++++++++++++++--
 2 files changed, 49 insertions(+), 4 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index c8d6da2b66..2925d883cc 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -2321,13 +2321,26 @@ class GlutenClickHouseTPCHSaltNullParquetSuite
   test("GLUTEN-4085: Fix unix_timestamp/to_unix_timestamp") {
     val tbl_create_sql = "create table test_tbl_4085(id bigint, data string) 
using parquet"
     val data_insert_sql =
-      "insert into test_tbl_4085 values(1, '2023-12-18'),(2, '2023-12-19'), 
(3, '2023-12-20')"
+      "insert into test_tbl_4085 values(1, '2023-12-18'),(2, '2023-12-19'), 
(3, '2023-12-20'),  (4, '2024-10-15 07:35:26.486')"
     val select_sql =
       "select id, unix_timestamp(to_date(data), 'yyyy-MM-dd') from 
test_tbl_4085"
     val select_sql_1 = "select id, to_unix_timestamp(to_date(data)) from 
test_tbl_4085"
     val select_sql_2 = "select id, to_unix_timestamp(to_timestamp(data)) from 
test_tbl_4085"
     val select_sql_3 =
-      "select id, unix_timestamp('2024-10-15 07:35:26.486', 'yyyy-MM-dd 
HH:mm:ss') from test_tbl_4085"
+      "select id, unix_timestamp(data, 'yyyy-MM-dd HH:mm:ss') from 
test_tbl_4085 where id = 4"
+    val select_sql_4 = "select id, unix_timestamp(data, 'yyyy-M-d') from 
test_tbl_4085 where id = 1"
+    val select_sql_5 =
+      "select id, unix_timestamp(data, 'yyyy-MM-dd HH') from test_tbl_4085 
where id = 4"
+    val select_sql_6 =
+      "select id, unix_timestamp(data, 'yyyy-MM-dd HH:mm') from test_tbl_4085 
where id = 4"
+    val select_sql_7 =
+      "select id, unix_timestamp(data, 'yyyy-MM-dd') from test_tbl_4085 where 
id = 4"
+    val select_sql_8 =
+      "select id, unix_timestamp(data, 'yyyy-MM-dd HH:mm:ss.SSS') from 
test_tbl_4085 where id = 4"
+    val select_sql_9 =
+      "select id, unix_timestamp(data, 'yyyy-MM-dd HH:mm:ss.S') from 
test_tbl_4085 where id = 4"
+    val select_sql_10 =
+      "select id, unix_timestamp(data, 'yyyy-MM-dd HH:mm:ss.') from 
test_tbl_4085 where id = 4"
     spark.sql(tbl_create_sql)
     spark.sql(data_insert_sql)
     compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
@@ -2335,6 +2348,13 @@ class GlutenClickHouseTPCHSaltNullParquetSuite
     compareResultsAgainstVanillaSpark(select_sql_2, true, { _ => })
     withSQLConf("spark.sql.legacy.timeParserPolicy" -> "LEGACY") {
       compareResultsAgainstVanillaSpark(select_sql_3, true, { _ => })
+      compareResultsAgainstVanillaSpark(select_sql_4, true, { _ => })
+      compareResultsAgainstVanillaSpark(select_sql_5, true, { _ => })
+      compareResultsAgainstVanillaSpark(select_sql_6, true, { _ => })
+      compareResultsAgainstVanillaSpark(select_sql_7, true, { _ => })
+      compareResultsAgainstVanillaSpark(select_sql_8, true, { _ => })
+      compareResultsAgainstVanillaSpark(select_sql_9, true, { _ => })
+      compareResultsAgainstVanillaSpark(select_sql_10, true, { _ => })
     }
     spark.sql("drop table test_tbl_4085")
   }
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/getTimestamp.h 
b/cpp-ch/local-engine/Parser/scalar_function_parser/getTimestamp.h
index 5a90ba0cd7..cbe83e5acc 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/getTimestamp.h
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/getTimestamp.h
@@ -23,6 +23,7 @@
 #include <Parser/FunctionParser.h>
 #include <boost/algorithm/string/case_conv.hpp>
 #include <Common/CHUtil.h>
+#include <regex>
 
 namespace DB
 {
@@ -78,7 +79,7 @@ public:
         UInt32 s_count = std::count(fmt.begin(), fmt.end(), 'S');
         String time_parser_policy = 
getContext()->getSettingsRef().has(TIMER_PARSER_POLICY) ? 
toString(getContext()->getSettingsRef().get(TIMER_PARSER_POLICY)) : "";
         boost::to_lower(time_parser_policy);
-        if (time_parser_policy == "legacy")
+        if (time_parser_policy == "legacy" && checkFormat(fmt))
         {
             if (s_count == 0)
             {
@@ -93,7 +94,6 @@ public:
                 fmt += String(3 - s_count, 'S');
             else
                 fmt = fmt.substr(0, fmt.size() - (s_count - 3));
-
             const auto * fmt_node = addColumnToActionsDAG(actions_dag, 
std::make_shared<DB::DataTypeString>(), fmt);
             const auto * result_node = toFunctionNode(actions_dag, 
"parseDateTime64InJodaSyntaxOrNull", {expr_arg, fmt_node});
             return convertNodeTypeIfNeeded(substrait_func, result_node, 
actions_dag);
@@ -104,5 +104,30 @@ public:
             return convertNodeTypeIfNeeded(substrait_func, result_node, 
actions_dag);
         }
     }
+
+private:
+    bool checkFormat(const String& fmt) const
+    {
+        if (fmt.size() < 10)
+        {
+            return false;
+        }
+        else
+        {
+            /**
+             * Match the format to the regex pattern, the following format 
will be matched:
+             * yyyy-MM-dd, yyyy/MM/dd
+             * yyyy-MM-dd HH, yyyy/MM/dd HH, yyyy-MM-ddTHH, yyyy/MM/ddTHH
+             * yyyy-MM-dd HH:mm, yyyy/MM/dd HH:mm, yyyy-MM-ddTHH:mm, 
yyyy/MM/ddTHH:mm
+             * yyyy-MM-dd HH:mm:ss, yyyy/MM/dd HH:mm:ss, yyyy-MM-ddTHH:mm:ss, 
yyyy/MM/ddTHH:mm:ss
+             * yyyy-MM-dd HH:mm:ss., yyyy/MM/dd HH:mm:ss., 
yyyy-MM-ddTHH:mm:ss., yyyy/MM/ddTHH:mm:ss.
+             * yyyy-MM-dd HH:mm:ss.S, yyyy/MM/dd HH:mm:ss.S, 
yyyy-MM-ddTHH:mm:ss.S, yyyy/MM/ddTHH:mm:ss.S
+             * yyyy-MM-dd HH:mm:ss.SS, yyyy/MM/dd HH:mm:ss.SS, 
yyyy-MM-ddTHH:mm:ss.SS, yyyy/MM/ddTHH:mm:ss.SS
+             * yyyy-MM-dd HH:mm:ss.SSS, yyyy/MM/dd HH:mm:ss.SSS, 
yyyy-MM-ddTHH:mm:ss.SSS, yyyy/MM/ddTHH:mm:ss.SSS
+             */
+            std::regex fmtPattern(R"(^yyyy[-/]MM[-/]dd([ 
T](HH(:mm(:ss(\.([S]{1,3})?)?)?)?)?)?$)");
+            return std::regex_match(fmt, fmtPattern);
+        }
+    }
 };
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to