This is an automated email from the ASF dual-hosted git repository.
liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 33f1c905f [GLUTEN-4997][CH]Fix year diff (#5079)
33f1c905f is described below
commit 33f1c905f33f5298c8523081fc9319529db02b47
Author: KevinyhZou <[email protected]>
AuthorDate: Fri Mar 22 17:46:39 2024 +0800
[GLUTEN-4997][CH]Fix year diff (#5079)
What changes were proposed in this pull request?
(Please fill in changes proposed in this fix)
(Fixes: #4997)
How was this patch tested?
TEST BY UT
---
.../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 12 ++++++++++
.../local-engine/Functions/SparkFunctionToDate.cpp | 26 +++++++++++++++-------
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index 0a71d2927..957cf99e6 100644
---
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -2452,5 +2452,17 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends
GlutenClickHouseTPCHAbstr
compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
spark.sql("drop table test_tbl_4279")
}
+
+ test("GLUTEN-4997: Bug fix year diff") {
+ val tbl_create_sql = "create table test_tbl_4997(id bigint, data string)
using parquet"
+ val tbl_insert_sql =
+ "insert into test_tbl_4997 values(1, '2024-01-03'), (2, '2024'), (3,
'2024-'), (4, '2024-1')," +
+ "(5, '2024-1-'), (6, '2024-1-3'), (7, '2024-1-3T'), (8, '21-0'), (9,
'12-9')";
+ val select_sql = "select id, year(data) from test_tbl_4997 order by id"
+ spark.sql(tbl_create_sql)
+ spark.sql(tbl_insert_sql)
+ compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
+ spark.sql("drop table test_tbl_4997")
+ }
}
// scalastyle:on line.size.limit
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
b/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
index 3a25e383d..1c5d68fcd 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
+++ b/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
@@ -53,7 +53,7 @@ public:
bool isVariadic() const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
- bool checkAndGetDate32(DB::ReadBuffer & buf, DB::DataTypeDate32::FieldType
&x, const DateLUTImpl & date_lut) const
+ bool checkAndGetDate32(DB::ReadBuffer & buf, DB::DataTypeDate32::FieldType
&x, const DateLUTImpl & date_lut, UInt8 & can_be_parsed) const
{
auto checkNumbericASCII = [&](DB::ReadBuffer & rb, size_t start,
size_t length) -> bool
{
@@ -73,12 +73,16 @@ public:
else
return true;
};
- if (!checkNumbericASCII(buf, 0, 4)
- || !checkDelimiter(buf, 4)
+ bool yearIsNumberic = checkNumbericASCII(buf, 0, 4);
+ if (!yearIsNumberic
+ || !checkDelimiter(buf, 4)
|| !checkNumbericASCII(buf, 5, 2)
|| !checkDelimiter(buf, 7)
|| !checkNumbericASCII(buf, 8, 2))
+ {
+ can_be_parsed = yearIsNumberic;
return false;
+ }
else
{
UInt8 month = (*(buf.position() + 5) - '0') * 10 +
(*(buf.position() + 6) - '0');
@@ -157,12 +161,18 @@ public:
null_container[i] = true;
continue;
}
- if (!checkAndGetDate32(buf, result_container[i],
*local_time_zone))
+ UInt8 can_be_parsed = 1;
+ if (!checkAndGetDate32(buf, result_container[i],
*local_time_zone, can_be_parsed))
{
- time_t tmp = 0;
- bool parsed = tryParseDateTimeBestEffort(tmp, buf,
*local_time_zone, *utc_time_zone);
- result_container[i] =
local_time_zone->toDayNum<time_t>(tmp);
- null_container[i] = !parsed;
+ if (!can_be_parsed)
+ null_container[i] = true;
+ else
+ {
+ time_t tmp = 0;
+ bool parsed = tryParseDateTimeBestEffort(tmp, buf,
*local_time_zone, *utc_time_zone);
+ result_container[i] =
local_time_zone->toDayNum<time_t>(tmp);
+ null_container[i] = !parsed;
+ }
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]