This is an automated email from the ASF dual-hosted git repository.

liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 33f1c905f [GLUTEN-4997][CH]Fix year diff (#5079)
33f1c905f is described below

commit 33f1c905f33f5298c8523081fc9319529db02b47
Author: KevinyhZou <[email protected]>
AuthorDate: Fri Mar 22 17:46:39 2024 +0800

    [GLUTEN-4997][CH]Fix year diff (#5079)
    
    What changes were proposed in this pull request?
    (Please fill in changes proposed in this fix)
    
    (Fixes: #4997)
    
    How was this patch tested?
    TEST BY UT
---
 .../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 12 ++++++++++
 .../local-engine/Functions/SparkFunctionToDate.cpp | 26 +++++++++++++++-------
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
 
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index 0a71d2927..957cf99e6 100644
--- 
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -2452,5 +2452,17 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
     compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
     spark.sql("drop table test_tbl_4279")
   }
+
+  test("GLUTEN-4997: Bug fix year diff") {
+    val tbl_create_sql = "create table test_tbl_4997(id bigint, data string) 
using parquet"
+    val tbl_insert_sql =
+      "insert into test_tbl_4997 values(1, '2024-01-03'), (2, '2024'), (3, 
'2024-'), (4, '2024-1')," +
+        "(5, '2024-1-'), (6, '2024-1-3'), (7, '2024-1-3T'), (8, '21-0'), (9, 
'12-9')";
+    val select_sql = "select id, year(data) from test_tbl_4997 order by id"
+    spark.sql(tbl_create_sql)
+    spark.sql(tbl_insert_sql)
+    compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
+    spark.sql("drop table test_tbl_4997")
+  }
 }
 // scalastyle:on line.size.limit
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp 
b/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
index 3a25e383d..1c5d68fcd 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
+++ b/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
@@ -53,7 +53,7 @@ public:
     bool isVariadic() const override { return true; }
     bool useDefaultImplementationForConstants() const override { return true; }
 
-    bool checkAndGetDate32(DB::ReadBuffer & buf, DB::DataTypeDate32::FieldType 
&x, const DateLUTImpl & date_lut) const
+    bool checkAndGetDate32(DB::ReadBuffer & buf, DB::DataTypeDate32::FieldType 
&x, const DateLUTImpl & date_lut, UInt8 & can_be_parsed) const
     {
         auto checkNumbericASCII = [&](DB::ReadBuffer & rb, size_t start, 
size_t length) -> bool
         {
@@ -73,12 +73,16 @@ public:
             else
                 return true;
         };
-        if (!checkNumbericASCII(buf, 0, 4) 
-            || !checkDelimiter(buf, 4) 
+        bool yearIsNumberic = checkNumbericASCII(buf, 0, 4);
+        if (!yearIsNumberic
+            || !checkDelimiter(buf, 4)
             || !checkNumbericASCII(buf, 5, 2)
             || !checkDelimiter(buf, 7) 
             || !checkNumbericASCII(buf, 8, 2))
+        {
+            can_be_parsed = yearIsNumberic;
             return false;
+        }
         else
         {
             UInt8 month = (*(buf.position() + 5) - '0') * 10 + 
(*(buf.position() + 6) - '0');
@@ -157,12 +161,18 @@ public:
                     null_container[i] = true;
                     continue;
                 }
-                if (!checkAndGetDate32(buf, result_container[i], 
*local_time_zone))
+                UInt8 can_be_parsed = 1;
+                if (!checkAndGetDate32(buf, result_container[i], 
*local_time_zone, can_be_parsed))
                 {
-                    time_t tmp = 0;
-                    bool parsed = tryParseDateTimeBestEffort(tmp, buf, 
*local_time_zone, *utc_time_zone);
-                    result_container[i] = 
local_time_zone->toDayNum<time_t>(tmp);
-                    null_container[i] = !parsed;
+                    if (!can_be_parsed)
+                        null_container[i] = true;
+                    else
+                    {
+                        time_t tmp = 0;
+                        bool parsed = tryParseDateTimeBestEffort(tmp, buf, 
*local_time_zone, *utc_time_zone);
+                        result_container[i] = 
local_time_zone->toDayNum<time_t>(tmp);
+                        null_container[i] = !parsed;
+                    }
                 }
             }
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to