This is an automated email from the ASF dual-hosted git repository.

taiyangli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 053c473cb2 fix slice unexpected exception (#8759)
053c473cb2 is described below

commit 053c473cb2890c71e697dda37b126d1fe6feb879
Author: 李扬 <[email protected]>
AuthorDate: Tue Feb 18 17:05:31 2025 +0800

    fix slice unexpected exception (#8759)
---
 .../execution/GlutenFunctionValidateSuite.scala    | 15 ++++++++++
 .../Parser/scalar_function_parser/slice.cpp        | 35 ++++++++++++----------
 2 files changed, 35 insertions(+), 15 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
index 75f9e9beb1..a2de084ad3 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
@@ -1029,4 +1029,19 @@ class GlutenFunctionValidateSuite extends 
GlutenClickHouseWholeStageTransformerS
       "approx_count_distinct(id, 0.1) from range(1000)"
     compareResultsAgainstVanillaSpark(sql, true, { _ => })
   }
+
+  test("GLUTEN-8723 fix slice unexpected exception") {
+    val create_sql = "create table t_8723 (full_user_agent string) using orc"
+    val insert_sql = "insert into t_8723 values(NULL)"
+    val select1_sql = "select " +
+      "slice(split(full_user_agent, ';'), 2, size(split(full_user_agent, 
';'))) from t_8723"
+    val select2_sql = "select slice(split(full_user_agent, ';'), 0, 2) from 
t_8723"
+    val drop_sql = "drop table t_8723"
+
+    spark.sql(create_sql)
+    spark.sql(insert_sql)
+    compareResultsAgainstVanillaSpark(select1_sql, true, { _ => })
+    compareResultsAgainstVanillaSpark(select2_sql, true, { _ => })
+    spark.sql(drop_sql)
+  }
 }
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/slice.cpp 
b/cpp-ch/local-engine/Parser/scalar_function_parser/slice.cpp
index 33efc02a91..c6a20cd43a 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/slice.cpp
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/slice.cpp
@@ -54,7 +54,7 @@ public:
             elif (if(isNull(length)))
                 null
             else
-                slice(assumeNotNull(arr), if (start=0) then throwIf(start=0) 
else start, if (length<0) then throwIf(length<0) else length)
+                slice(assumeNotNull(arr), if(isNotNull(arr) and start=0) then 
throwIf(isNotNull(arr) and start=0) else start, if (isNotNull(arr) and 
length<0) then throwIf(length<0) else length)
 
             Main differences between CH arraySlice and Spark slice
             1. Spark slice throws exception if start = 0 or length < 0
@@ -73,9 +73,10 @@ public:
         auto is_start_nullable = start_arg->result_type->isNullable();
         auto is_length_nullable = length_arg->result_type->isNullable();
 
+        const auto * arr_not_null_node = toFunctionNode(actions_dag, 
"isNotNull", {arr_arg});
         const auto * zero_const_node = addColumnToActionsDAG(actions_dag, 
std::make_shared<DB::DataTypeInt32>(), 0);
-        const auto * start_if_node = makeStartIfNode(actions_dag, start_arg, 
zero_const_node);
-        const auto * length_if_node = makeLengthIfNode(actions_dag, 
length_arg, zero_const_node);
+        const auto * start_if_node = makeStartIfNode(actions_dag, start_arg, 
zero_const_node, arr_not_null_node);
+        const auto * length_if_node = makeLengthIfNode(actions_dag, 
length_arg, zero_const_node, arr_not_null_node);
 
         if (!is_arr_nullable && !is_start_nullable && !is_length_nullable)
         {
@@ -84,8 +85,8 @@ public:
         }
 
         // There is at least one nullable argument, should return nullable 
result
-        const auto * arr_not_null_node = is_arr_nullable ? 
toFunctionNode(actions_dag, "assumeNotNull", {arr_arg}) : arr_arg;
-        const auto * slice_node = toFunctionNode(actions_dag, "arraySlice", 
{arr_not_null_node, start_if_node, length_if_node});
+        const auto * arr_denull_node = is_arr_nullable ? 
toFunctionNode(actions_dag, "assumeNotNull", {arr_arg}) : arr_arg;
+        const auto * slice_node = toFunctionNode(actions_dag, "arraySlice", 
{arr_denull_node, start_if_node, length_if_node});
         DB::DataTypePtr wrap_arr_nullable_type = wrapNullableType(true, 
slice_node->result_type);
 
         const auto * wrap_slice_node = ActionsDAGUtil::convertNodeType(
@@ -102,28 +103,32 @@ public:
     }
 
 private:
-    // if (start=0) then throwIf(start=0) else start
+    // if(isNotNull(arr) and start=0) then throwIf(isNotNull(arr) and start=0) 
else start
     const DB::ActionsDAG::Node * makeStartIfNode(
         DB::ActionsDAG & actions_dag,
         const DB::ActionsDAG::Node * start_arg,
-        const DB::ActionsDAG::Node * zero_const_node) const
+        const DB::ActionsDAG::Node * zero_const_node,
+        const DB::ActionsDAG::Node * arr_not_null_node) const
     {
         const auto * start_equal_zero_node = toFunctionNode(actions_dag, 
"equals", {start_arg, zero_const_node});
-        const auto * start_msg_const_node = addColumnToActionsDAG(actions_dag, 
std::make_shared<DB::DataTypeString>(), "Unexpected value for start");
-        const auto * start_throw_if_node = toFunctionNode(actions_dag, 
"throwIf", {start_equal_zero_node, start_msg_const_node});
-        return toFunctionNode(actions_dag, "if", {start_equal_zero_node, 
start_throw_if_node, start_arg});
+        const auto * condition_node = toFunctionNode(actions_dag, "and", 
{arr_not_null_node, start_equal_zero_node});
+        const auto * msg_node = addColumnToActionsDAG(actions_dag, 
std::make_shared<DB::DataTypeString>(), "Unexpected value for start");
+        const auto * throw_if_node = toFunctionNode(actions_dag, "throwIf", 
{condition_node, msg_node});
+        return toFunctionNode(actions_dag, "if", {condition_node, 
throw_if_node, start_arg});
     }
 
-     // if (length<0) then throwIf(length<0) else length
+    // if (isNotNull(arr) and length<0) then throwIf(length<0) else length)
     const DB::ActionsDAG::Node * makeLengthIfNode(
         DB::ActionsDAG & actions_dag,
         const DB::ActionsDAG::Node * length_arg,
-        const DB::ActionsDAG::Node * zero_const_node) const
+        const DB::ActionsDAG::Node * zero_const_node,
+        const DB::ActionsDAG::Node * arr_not_null_node) const
     {
         const auto * length_less_zero_node = toFunctionNode(actions_dag, 
"less", {length_arg, zero_const_node});
-        const auto * length_msg_const_node = 
addColumnToActionsDAG(actions_dag, std::make_shared<DB::DataTypeString>(), 
"Unexpected value for length");
-        const auto * length_throw_if_node = toFunctionNode(actions_dag, 
"throwIf", {length_less_zero_node, length_msg_const_node});
-        return toFunctionNode(actions_dag, "if", {length_less_zero_node, 
length_throw_if_node, length_arg});
+        const auto * condition_node = toFunctionNode(actions_dag, "and", 
{arr_not_null_node, length_less_zero_node});
+        const auto * msg_node = addColumnToActionsDAG(actions_dag, 
std::make_shared<DB::DataTypeString>(), "Unexpected value for length");
+        const auto * throw_if_node = toFunctionNode(actions_dag, "throwIf", 
{condition_node, msg_node});
+        return toFunctionNode(actions_dag, "if", {condition_node, 
throw_if_node, length_arg});
     }
 };
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to