This is an automated email from the ASF dual-hosted git repository.

liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 0f19a8632 [GLUTEN-4898][CH]Bug fix to date diff (#4900)
0f19a8632 is described below

commit 0f19a86320e8d6eea670951113e997dfd4281a64
Author: KevinyhZou <[email protected]>
AuthorDate: Mon Mar 11 18:35:00 2024 +0800

    [GLUTEN-4898][CH]Bug fix to date diff (#4900)
    
    What changes were proposed in this pull request?
    (Please fill in changes proposed in this fix)
    
    (Fixes: #4898)
    
    Fix diff problem of to_date function;
    Fix exception throws from parseDateTimeBestEffort of 
SparkFunctionToDateTime;
    simplify substring function code
    How was this patch tested?
    TEST BY UT
---
 .../GlutenClickHouseTPCHParquetSuite.scala         |  5 +++-
 .../local-engine/Functions/SparkFunctionToDate.cpp | 30 +++++++++++-----------
 .../Functions/SparkFunctionToDateTime.h            |  4 +--
 .../Parser/scalar_function_parser/substring.cpp    |  5 ++--
 4 files changed, 23 insertions(+), 21 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala
 
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala
index 1c44c2de0..9ffde6b5c 100644
--- 
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala
@@ -2103,7 +2103,10 @@ class GlutenClickHouseTPCHParquetSuite extends 
GlutenClickHouseTPCHAbstractSuite
         |(4, '2023-09-02 00:00:01.333-11'),
         |(5, '  2023-09-02 agdfegfew'),
         |(6, 'afe2023-09-02 11:22:33'),
-        |(7, '1970-01-01 00:00:00')
+        |(7, '1970-01-01 00:00:00'),
+        |(8, '2024-3-2'),
+        |(9, '2024-03-2'),
+        |(10, '2024-03')
         |""".stripMargin
     spark.sql(create_table_sql)
     spark.sql(insert_data_sql)
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp 
b/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
index cef3d1452..0b963e769 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
+++ b/cpp-ch/local-engine/Functions/SparkFunctionToDate.cpp
@@ -22,6 +22,7 @@
 #include <Functions/FunctionFactory.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/ReadHelpers.h>
+#include <IO/parseDateTimeBestEffort.h>
 
 namespace DB
 {
@@ -50,7 +51,9 @@ public:
             for (size_t i = start; i < start + length; ++i)
             {
                 if (!isNumericASCII(*(rb.position() + i)))
+                {
                     return false;
+                }
             }
             return true;
         };
@@ -63,7 +66,7 @@ public:
         };
         if (!checkNumbericASCII(buf, 0, 4) 
             || !checkDelimiter(buf, 4) 
-            || !checkNumbericASCII(buf, 5, 2) 
+            || !checkNumbericASCII(buf, 5, 2)
             || !checkDelimiter(buf, 7) 
             || !checkNumbericASCII(buf, 8, 2))
             return false;
@@ -112,19 +115,19 @@ public:
             throw DB::Exception(DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, 
"Function {}'s return type must be date32.", name);
         
         using ColVecTo = DB::DataTypeDate32::ColumnType;
-        typename ColVecTo::MutablePtr result_column = ColVecTo::create(size);
+        typename ColVecTo::MutablePtr result_column = ColVecTo::create(size, 
0);
         typename ColVecTo::Container & result_container = 
result_column->getData();
-        DB::ColumnUInt8::MutablePtr null_map = DB::ColumnUInt8::create(size);
+        DB::ColumnUInt8::MutablePtr null_map = DB::ColumnUInt8::create(size, 
0);
         typename DB::ColumnUInt8::Container & null_container = 
null_map->getData();
-        const DateLUTImpl * time_zone = &DateLUT::instance();
+        const DateLUTImpl * local_time_zone = &DateLUT::instance();
+        const DateLUTImpl * utc_time_zone = &DateLUT::instance("UTC");
 
         for (size_t i = 0; i < size; ++i)
         {
             auto str = src_col->getDataAt(i);
-            if (str.size < 10)
+            if (str.size < 4)
             {
                 null_container[i] = true;
-                result_container[i] = 0;
                 continue;
             }
             else
@@ -134,20 +137,17 @@ public:
                 {
                     buf.position() ++;
                 }
-                if(buf.buffer().end() - buf.position() < 10)
+                if(buf.buffer().end() - buf.position() < 4)
                 {
                     null_container[i] = true;
-                    result_container[i] = 0;
                     continue;
                 }
-                if (!checkAndGetDate32(buf, result_container[i], *time_zone))
-                {
-                    null_container[i] = true;
-                    result_container[i] = 0;
-                }
-                else
+                if (!checkAndGetDate32(buf, result_container[i], 
*local_time_zone))
                 {
-                    null_container[i] = false;
+                    time_t tmp = 0;
+                    bool parsed = tryParseDateTimeBestEffort(tmp, buf, 
*local_time_zone, *utc_time_zone);
+                    result_container[i] = 
local_time_zone->toDayNum<time_t>(tmp);
+                    null_container[i] = !parsed;
                 }
             }
         }
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionToDateTime.h 
b/cpp-ch/local-engine/Functions/SparkFunctionToDateTime.h
index 760a4da75..d185b850f 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionToDateTime.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionToDateTime.h
@@ -163,8 +163,8 @@ public:
             }
             else
             {
-                parseDateTime64BestEffort(dst_data[i], scale, buf, 
*local_time_zone, *utc_time_zone);
-                null_map_data[i] = 0;
+                bool parsed = tryParseDateTime64BestEffort(dst_data[i], scale, 
buf, *local_time_zone, *utc_time_zone);
+                null_map_data[i] = !parsed;
             }
         }
     }
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp 
b/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
index f4c21f4ad..2a44c3e38 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
@@ -51,15 +51,14 @@ public:
          /**
             parse substring(str, start_index, length) as
             if (start_index == 0)
-                substring(str, start_index+1, length)
+                substring(str, 1, length)
             else
                 substring(str, start_index, length)
         */
         auto * const_zero_node = addColumnToActionsDAG(actions_dag, 
start_index_data_type, Field(0));
         auto * const_one_node = addColumnToActionsDAG(actions_dag, 
start_index_data_type, Field(1));
         auto * equals_zero_node = toFunctionNode(actions_dag, "equals", 
{parsed_args[1], const_zero_node});
-        auto * index_plus_node = toFunctionNode(actions_dag, "plus", 
{parsed_args[1], const_one_node});
-        auto * if_node = toFunctionNode(actions_dag, "if", {equals_zero_node, 
index_plus_node, parsed_args[1]});
+        auto * if_node = toFunctionNode(actions_dag, "if", {equals_zero_node, 
const_one_node, parsed_args[1]});
         const DB::ActionsDAG::Node * substring_func_node;
         if (parsed_args.size() == 2)
             substring_func_node = toFunctionNode(actions_dag, "substringUTF8", 
{parsed_args[0], if_node});


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to