This is an automated email from the ASF dual-hosted git repository.
liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 0f19a8632 [GLUTEN-4898][CH]Bug fix to date diff (#4900)
0f19a8632 is described below
commit 0f19a86320e8d6eea670951113e997dfd4281a64
Author: KevinyhZou <[email protected]>
AuthorDate: Mon Mar 11 18:35:00 2024 +0800
[GLUTEN-4898][CH]Bug fix to date diff (#4900)
What changes were proposed in this pull request?
(Please fill in changes proposed in this fix)
(Fixes: #4898)
Fix diff problem of to_date function;
Fix exception throws from parseDateTimeBestEffort of
SparkFunctionToDateTime;
simplify substring function code
How was this patch tested?
TEST BY UT
---
.../GlutenClickHouseTPCHParquetSuite.scala | 5 +++-
.../local-engine/Functions/SparkFunctionToDate.cpp | 30 +++++++++++-----------
.../Functions/SparkFunctionToDateTime.h | 4 +--
.../Parser/scalar_function_parser/substring.cpp | 5 ++--
4 files changed, 23 insertions(+), 21 deletions(-)
diff --git
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala
index 1c44c2de0..9ffde6b5c 100644
---
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala
+++
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala
@@ -2103,7 +2103,10 @@ class GlutenClickHouseTPCHParquetSuite extends
GlutenClickHouseTPCHAbstractSuite
|(4, '2023-09-02 00:00:01.333-11'),
|(5, ' 2023-09-02 agdfegfew'),
|(6, 'afe2023-09-02 11:22:33'),
- |(7, '1970-01-01 00:00:00')
+ |(7, '1970-01-01 00:00:00'),
+ |(8, '2024-3-2'),
+ |(9, '2024-03-2'),
+ |(10, '2024-03')
|""".stripMargin
spark.sql(create_table_sql)
spark.sql(insert_data_sql)
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
b/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
index cef3d1452..0b963e769 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
+++ b/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
@@ -22,6 +22,7 @@
#include <Functions/FunctionFactory.h>
#include <IO/ReadBufferFromMemory.h>
#include <IO/ReadHelpers.h>
+#include <IO/parseDateTimeBestEffort.h>
namespace DB
{
@@ -50,7 +51,9 @@ public:
for (size_t i = start; i < start + length; ++i)
{
if (!isNumericASCII(*(rb.position() + i)))
+ {
return false;
+ }
}
return true;
};
@@ -63,7 +66,7 @@ public:
};
if (!checkNumbericASCII(buf, 0, 4)
|| !checkDelimiter(buf, 4)
- || !checkNumbericASCII(buf, 5, 2)
+ || !checkNumbericASCII(buf, 5, 2)
|| !checkDelimiter(buf, 7)
|| !checkNumbericASCII(buf, 8, 2))
return false;
@@ -112,19 +115,19 @@ public:
throw DB::Exception(DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Function {}'s return type must be date32.", name);
using ColVecTo = DB::DataTypeDate32::ColumnType;
- typename ColVecTo::MutablePtr result_column = ColVecTo::create(size);
+ typename ColVecTo::MutablePtr result_column = ColVecTo::create(size,
0);
typename ColVecTo::Container & result_container =
result_column->getData();
- DB::ColumnUInt8::MutablePtr null_map = DB::ColumnUInt8::create(size);
+ DB::ColumnUInt8::MutablePtr null_map = DB::ColumnUInt8::create(size,
0);
typename DB::ColumnUInt8::Container & null_container =
null_map->getData();
- const DateLUTImpl * time_zone = &DateLUT::instance();
+ const DateLUTImpl * local_time_zone = &DateLUT::instance();
+ const DateLUTImpl * utc_time_zone = &DateLUT::instance("UTC");
for (size_t i = 0; i < size; ++i)
{
auto str = src_col->getDataAt(i);
- if (str.size < 10)
+ if (str.size < 4)
{
null_container[i] = true;
- result_container[i] = 0;
continue;
}
else
@@ -134,20 +137,17 @@ public:
{
buf.position() ++;
}
- if(buf.buffer().end() - buf.position() < 10)
+ if(buf.buffer().end() - buf.position() < 4)
{
null_container[i] = true;
- result_container[i] = 0;
continue;
}
- if (!checkAndGetDate32(buf, result_container[i], *time_zone))
- {
- null_container[i] = true;
- result_container[i] = 0;
- }
- else
+ if (!checkAndGetDate32(buf, result_container[i],
*local_time_zone))
{
- null_container[i] = false;
+ time_t tmp = 0;
+ bool parsed = tryParseDateTimeBestEffort(tmp, buf,
*local_time_zone, *utc_time_zone);
+ result_container[i] =
local_time_zone->toDayNum<time_t>(tmp);
+ null_container[i] = !parsed;
}
}
}
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionToDateTime.h
b/cpp-ch/local-engine/Functions/SparkFunctionToDateTime.h
index 760a4da75..d185b850f 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionToDateTime.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionToDateTime.h
@@ -163,8 +163,8 @@ public:
}
else
{
- parseDateTime64BestEffort(dst_data[i], scale, buf,
*local_time_zone, *utc_time_zone);
- null_map_data[i] = 0;
+ bool parsed = tryParseDateTime64BestEffort(dst_data[i], scale,
buf, *local_time_zone, *utc_time_zone);
+ null_map_data[i] = !parsed;
}
}
}
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
index f4c21f4ad..2a44c3e38 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
@@ -51,15 +51,14 @@ public:
/**
parse substring(str, start_index, length) as
if (start_index == 0)
- substring(str, start_index+1, length)
+ substring(str, 1, length)
else
substring(str, start_index, length)
*/
auto * const_zero_node = addColumnToActionsDAG(actions_dag,
start_index_data_type, Field(0));
auto * const_one_node = addColumnToActionsDAG(actions_dag,
start_index_data_type, Field(1));
auto * equals_zero_node = toFunctionNode(actions_dag, "equals",
{parsed_args[1], const_zero_node});
- auto * index_plus_node = toFunctionNode(actions_dag, "plus",
{parsed_args[1], const_one_node});
- auto * if_node = toFunctionNode(actions_dag, "if", {equals_zero_node,
index_plus_node, parsed_args[1]});
+ auto * if_node = toFunctionNode(actions_dag, "if", {equals_zero_node,
const_one_node, parsed_args[1]});
const DB::ActionsDAG::Node * substring_func_node;
if (parsed_args.size() == 2)
substring_func_node = toFunctionNode(actions_dag, "substringUTF8",
{parsed_args[0], if_node});
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]