This is an automated email from the ASF dual-hosted git repository.
lgbo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 9acc625a52 [GLUTEN-10980][CH]Fix unix_timestamp diff when format is
not `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss` (#10981)
9acc625a52 is described below
commit 9acc625a52dedb2d8e8aa8eb56d938d66d325033
Author: kevinyhzou <[email protected]>
AuthorDate: Fri Oct 31 14:21:25 2025 +0800
[GLUTEN-10980][CH]Fix unix_timestamp diff when format is not `yyyy-MM-dd`
or `yyyy-MM-dd HH:mm:ss` (#10981)
* Fix unix_timestamp
* rename to checkFormat
* Fix reviews
---
.../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 24 ++++++++++++++++--
.../Parser/scalar_function_parser/getTimestamp.h | 29 ++++++++++++++++++++--
2 files changed, 49 insertions(+), 4 deletions(-)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index c8d6da2b66..2925d883cc 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -2321,13 +2321,26 @@ class GlutenClickHouseTPCHSaltNullParquetSuite
test("GLUTEN-4085: Fix unix_timestamp/to_unix_timestamp") {
val tbl_create_sql = "create table test_tbl_4085(id bigint, data string)
using parquet"
val data_insert_sql =
- "insert into test_tbl_4085 values(1, '2023-12-18'),(2, '2023-12-19'),
(3, '2023-12-20')"
+ "insert into test_tbl_4085 values(1, '2023-12-18'),(2, '2023-12-19'),
(3, '2023-12-20'), (4, '2024-10-15 07:35:26.486')"
val select_sql =
"select id, unix_timestamp(to_date(data), 'yyyy-MM-dd') from
test_tbl_4085"
val select_sql_1 = "select id, to_unix_timestamp(to_date(data)) from
test_tbl_4085"
val select_sql_2 = "select id, to_unix_timestamp(to_timestamp(data)) from
test_tbl_4085"
val select_sql_3 =
- "select id, unix_timestamp('2024-10-15 07:35:26.486', 'yyyy-MM-dd
HH:mm:ss') from test_tbl_4085"
+ "select id, unix_timestamp(data, 'yyyy-MM-dd HH:mm:ss') from
test_tbl_4085 where id = 4"
+ val select_sql_4 = "select id, unix_timestamp(data, 'yyyy-M-d') from
test_tbl_4085 where id = 1"
+ val select_sql_5 =
+ "select id, unix_timestamp(data, 'yyyy-MM-dd HH') from test_tbl_4085
where id = 4"
+ val select_sql_6 =
+ "select id, unix_timestamp(data, 'yyyy-MM-dd HH:mm') from test_tbl_4085
where id = 4"
+ val select_sql_7 =
+ "select id, unix_timestamp(data, 'yyyy-MM-dd') from test_tbl_4085 where
id = 4"
+ val select_sql_8 =
+ "select id, unix_timestamp(data, 'yyyy-MM-dd HH:mm:ss.SSS') from
test_tbl_4085 where id = 4"
+ val select_sql_9 =
+ "select id, unix_timestamp(data, 'yyyy-MM-dd HH:mm:ss.S') from
test_tbl_4085 where id = 4"
+ val select_sql_10 =
+ "select id, unix_timestamp(data, 'yyyy-MM-dd HH:mm:ss.') from
test_tbl_4085 where id = 4"
spark.sql(tbl_create_sql)
spark.sql(data_insert_sql)
compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
@@ -2335,6 +2348,13 @@ class GlutenClickHouseTPCHSaltNullParquetSuite
compareResultsAgainstVanillaSpark(select_sql_2, true, { _ => })
withSQLConf("spark.sql.legacy.timeParserPolicy" -> "LEGACY") {
compareResultsAgainstVanillaSpark(select_sql_3, true, { _ => })
+ compareResultsAgainstVanillaSpark(select_sql_4, true, { _ => })
+ compareResultsAgainstVanillaSpark(select_sql_5, true, { _ => })
+ compareResultsAgainstVanillaSpark(select_sql_6, true, { _ => })
+ compareResultsAgainstVanillaSpark(select_sql_7, true, { _ => })
+ compareResultsAgainstVanillaSpark(select_sql_8, true, { _ => })
+ compareResultsAgainstVanillaSpark(select_sql_9, true, { _ => })
+ compareResultsAgainstVanillaSpark(select_sql_10, true, { _ => })
}
spark.sql("drop table test_tbl_4085")
}
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/getTimestamp.h
b/cpp-ch/local-engine/Parser/scalar_function_parser/getTimestamp.h
index 5a90ba0cd7..cbe83e5acc 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/getTimestamp.h
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/getTimestamp.h
@@ -23,6 +23,7 @@
#include <Parser/FunctionParser.h>
#include <boost/algorithm/string/case_conv.hpp>
#include <Common/CHUtil.h>
+#include <regex>
namespace DB
{
@@ -78,7 +79,7 @@ public:
UInt32 s_count = std::count(fmt.begin(), fmt.end(), 'S');
String time_parser_policy =
getContext()->getSettingsRef().has(TIMER_PARSER_POLICY) ?
toString(getContext()->getSettingsRef().get(TIMER_PARSER_POLICY)) : "";
boost::to_lower(time_parser_policy);
- if (time_parser_policy == "legacy")
+ if (time_parser_policy == "legacy" && checkFormat(fmt))
{
if (s_count == 0)
{
@@ -93,7 +94,6 @@ public:
fmt += String(3 - s_count, 'S');
else
fmt = fmt.substr(0, fmt.size() - (s_count - 3));
-
const auto * fmt_node = addColumnToActionsDAG(actions_dag,
std::make_shared<DB::DataTypeString>(), fmt);
const auto * result_node = toFunctionNode(actions_dag,
"parseDateTime64InJodaSyntaxOrNull", {expr_arg, fmt_node});
return convertNodeTypeIfNeeded(substrait_func, result_node,
actions_dag);
@@ -104,5 +104,30 @@ public:
return convertNodeTypeIfNeeded(substrait_func, result_node,
actions_dag);
}
}
+
+private:
+ bool checkFormat(const String& fmt) const
+ {
+ if (fmt.size() < 10)
+ {
+ return false;
+ }
+ else
+ {
+ /**
+ * Match the format to the regex pattern, the following format
will be matched:
+ * yyyy-MM-dd, yyyy/MM/dd
+ * yyyy-MM-dd HH, yyyy/MM/dd HH, yyyy-MM-ddTHH, yyyy/MM/ddTHH
+ * yyyy-MM-dd HH:mm, yyyy/MM/dd HH:mm, yyyy-MM-ddTHH:mm,
yyyy/MM/ddTHH:mm
+ * yyyy-MM-dd HH:mm:ss, yyyy/MM/dd HH:mm:ss, yyyy-MM-ddTHH:mm:ss,
yyyy/MM/ddTHH:mm:ss
+ * yyyy-MM-dd HH:mm:ss., yyyy/MM/dd HH:mm:ss.,
yyyy-MM-ddTHH:mm:ss., yyyy/MM/ddTHH:mm:ss.
+ * yyyy-MM-dd HH:mm:ss.S, yyyy/MM/dd HH:mm:ss.S,
yyyy-MM-ddTHH:mm:ss.S, yyyy/MM/ddTHH:mm:ss.S
+ * yyyy-MM-dd HH:mm:ss.SS, yyyy/MM/dd HH:mm:ss.SS,
yyyy-MM-ddTHH:mm:ss.SS, yyyy/MM/ddTHH:mm:ss.SS
+ * yyyy-MM-dd HH:mm:ss.SSS, yyyy/MM/dd HH:mm:ss.SSS,
yyyy-MM-ddTHH:mm:ss.SSS, yyyy/MM/ddTHH:mm:ss.SSS
+ */
+ std::regex fmtPattern(R"(^yyyy[-/]MM[-/]dd([
T](HH(:mm(:ss(\.([S]{1,3})?)?)?)?)?)?$)");
+ return std::regex_match(fmt, fmtPattern);
+ }
+ }
};
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]