This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 0369a6b0db [GLUTEN-7240][CH] Fix all failed uts in 
GlutenComplexTypeSuite (#7242)
0369a6b0db is described below

commit 0369a6b0dbffa27830727a0951b08260ed26c278
Author: 李扬 <[email protected]>
AuthorDate: Fri Oct 11 10:59:52 2024 +0800

    [GLUTEN-7240][CH] Fix all failed uts in GlutenComplexTypeSuite (#7242)
    
    * fix failed uts
    
    * rename files
    
    * fix failed uts
    
    * fix failed uts
    
    * update version
    
    * update version
    
    * update version
---
 .../CommonScalarFunctionParser.cpp                 |  2 +-
 .../Parser/scalar_function_parser/arrayElement.h   | 80 ----------------------
 .../Parser/scalar_function_parser/elementAt.cpp    | 58 +++++++++++++---
 .../Parser/scalar_function_parser/getArrayItem.cpp | 67 +++++++++++++++---
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  5 --
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  5 --
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  5 --
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  5 --
 8 files changed, 108 insertions(+), 119 deletions(-)

diff --git 
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
 
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
index 695166a894..8fe6fb8da8 100644
--- 
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
+++ 
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
@@ -171,7 +171,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(ArraysZip, 
arrays_zip, arrayZipUnaligned)
 
 // map functions
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Map, map, map);
-REGISTER_COMMON_SCALAR_FUNCTION_PARSER(GetMapValue, get_map_value, 
arrayElement);
+REGISTER_COMMON_SCALAR_FUNCTION_PARSER(GetMapValue, get_map_value, 
arrayElementOrNull);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapKeys, map_keys, mapKeys);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapValues, map_values, mapValues);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapFromArrays, map_from_arrays, 
mapFromArrays);
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/arrayElement.h 
b/cpp-ch/local-engine/Parser/scalar_function_parser/arrayElement.h
deleted file mode 100644
index 964130ebcb..0000000000
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/arrayElement.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <DataTypes/IDataType.h>
-#include <Parser/FunctionParser.h>
-
-#include <Core/Field.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeNullable.h>
-#include <Functions/FunctionHelpers.h>
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-}
-}
-
-namespace local_engine
-{
-
-class FunctionParserArrayElement : public FunctionParser
-{
-
-public:
-    explicit FunctionParserArrayElement(SerializedPlanParser * plan_parser_) : 
FunctionParser(plan_parser_) { }
-    ~FunctionParserArrayElement() override = default;
-
-    const ActionsDAG::Node * parse(
-    const substrait::Expression_ScalarFunction & substrait_func,
-    ActionsDAG & actions_dag) const override
-    {
-        /**
-            parse arrayElement(arr, idx) as
-            if (idx > size(arr))
-                null
-            else 
-                arrayElement(arr, idx)
-        */
-        auto parsed_args = parseFunctionArguments(substrait_func, actions_dag);
-        if (parsed_args.size() != 2)
-            throw Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, 
"Function {} requires exactly two arguments", getName());
-
-        //arrayElement(arrary, idx)
-        auto * array_element_node = toFunctionNode(actions_dag, 
"arrayElement", parsed_args);
-        //idx > length(array)
-        auto * length_node = toFunctionNode(actions_dag, "length", 
{parsed_args[0]});
-        auto * greater_or_equals_node = toFunctionNode(actions_dag, "greater", 
{parsed_args[1], length_node});
-        const DataTypeArray * array_type = 
checkAndGetDataType<DataTypeArray>(removeNullable(parsed_args[0]->result_type).get());
-        if (!array_type)
-            throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "First argument for 
function {} must be an array", getName());
-        const DataTypePtr element_type = array_type->getNestedType();
-        const auto * null_const_node = addColumnToActionsDAG(actions_dag, 
makeNullable(element_type), Field{});
-        //if(idx > length(array), NULL, arrayElement(array, idx))
-        auto * if_node = toFunctionNode(actions_dag, "if", 
{greater_or_equals_node, null_const_node, array_element_node});
-        return if_node;
-    }
-
-    String getCHFunctionName(const substrait::Expression_ScalarFunction & 
/*substrait_func*/) const override
-    {
-        return "arrayElement";
-    }
-};
-
-}
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/elementAt.cpp 
b/cpp-ch/local-engine/Parser/scalar_function_parser/elementAt.cpp
index b5587e79dc..021e9245e2 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/elementAt.cpp
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/elementAt.cpp
@@ -15,29 +15,71 @@
  * limitations under the License.
  */
 
-#include <Parser/scalar_function_parser/arrayElement.h>
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/IDataType.h>
+#include <Parser/FunctionParser.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+extern const int BAD_ARGUMENTS;
+}
+}
 
 namespace local_engine
 {
-class FunctionParserElementAt : public FunctionParserArrayElement
+class FunctionParserElementAt : public FunctionParser
 {
 public:
-    explicit FunctionParserElementAt(SerializedPlanParser * plan_parser_) : 
FunctionParserArrayElement(plan_parser_) { }
+    explicit FunctionParserElementAt(SerializedPlanParser * plan_parser_) : 
FunctionParser(plan_parser_) { }
     ~FunctionParserElementAt() override = default;
+
     static constexpr auto name = "element_at";
     String getName() const override { return name; }
 
     const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction 
& substrait_func, ActionsDAG & actions_dag) const override
     {
+        /*
+            parse element_at(arr, idx) as
+            if (idx == 0)
+                throwIf(idx == 0)
+            else
+                arrayElementOrNull(arr, idx)
+
+            parse element_at(map, key) as arrayElementOrNull(map, key)
+        */
         auto parsed_args = parseFunctionArguments(substrait_func, actions_dag);
         if (parsed_args.size() != 2)
-            throw Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, 
"Function {} requires exactly two arguments", getName());
-        if (isMap(removeNullable(parsed_args[0]->result_type)))
-            return toFunctionNode(actions_dag, "arrayElement", parsed_args);
-        else
-            return FunctionParserArrayElement::parse(substrait_func, 
actions_dag);
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, 
"Function {} requires exactly two arguments", getName());
+
+        auto arg_type = removeNullable(parsed_args[0]->result_type);
+        if (isMap(arg_type))
+        {
+            const auto * map_arg = parsed_args[0];
+            const auto * key_arg = parsed_args[1];
+            const auto * result_node = toFunctionNode(actions_dag, 
"arrayElementOrNull", {map_arg, key_arg});
+            return convertNodeTypeIfNeeded(substrait_func, result_node, 
actions_dag);
+        }
+        else if (isArray(arg_type))
+        {
+            const auto * arr_arg = parsed_args[0];
+            const auto * idx_arg = parsed_args[1];
+
+            const auto * zero_node = addColumnToActionsDAG(actions_dag, 
std::make_shared<DataTypeInt32>(), 0);
+            const auto * equal_zero_node = toFunctionNode(actions_dag, 
"equals", {idx_arg, zero_node});
+            const auto * throw_message
+                = addColumnToActionsDAG(actions_dag, 
std::make_shared<DataTypeString>(), "SQL array indices start at 1");
+            /// throwIf(idx == 0, 'SQL array indices start at 1')
+            const auto * throw_if_node = toFunctionNode(actions_dag, 
"throwIf", {equal_zero_node, throw_message});
+
+            const auto * array_element_node = toFunctionNode(actions_dag, 
"arrayElementOrNull", {arr_arg, idx_arg});
+            /// if(throwIf(idx == 0, 'SQL array indices start at 1'), 
arrayElementOrNull(arr, idx), arrayElementOrNull(arr, idx))
+            const auto * if_node = toFunctionNode(actions_dag, "if", 
{throw_if_node, array_element_node, array_element_node});
+            return convertNodeTypeIfNeeded(substrait_func, if_node, 
actions_dag);
+        }
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "First argument of function 
{} must be an array or a map", getName());
     }
 };
 
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/getArrayItem.cpp 
b/cpp-ch/local-engine/Parser/scalar_function_parser/getArrayItem.cpp
index bdb0bcd098..7b73b4a10c 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/getArrayItem.cpp
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/getArrayItem.cpp
@@ -14,19 +14,66 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <DataTypes/IDataType.h>
+#include <Parser/FunctionParser.h>
 
-#include <Parser/scalar_function_parser/arrayElement.h>
+#include <Core/Field.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <Functions/FunctionHelpers.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+}
 
 namespace local_engine
 {
-    class FunctionParserGetArrayItem : public FunctionParserArrayElement
+
+class FunctionParserGetArrayItem : public FunctionParser
+{
+public:
+    explicit FunctionParserGetArrayItem(SerializedPlanParser * plan_parser_) : 
FunctionParser(plan_parser_) { }
+    ~FunctionParserGetArrayItem() override = default;
+
+    static constexpr auto name = "get_array_item";
+    String getName() const override { return name; }
+
+    const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction 
& substrait_func, ActionsDAG & actions_dag) const override
     {
-    public:
-        explicit FunctionParserGetArrayItem(SerializedPlanParser * 
plan_parser_) : FunctionParserArrayElement(plan_parser_) { }
-        ~FunctionParserGetArrayItem() override = default;
-        static constexpr auto name = "get_array_item";
-        String getName() const override { return name; }
-    };
-
-    static FunctionParserRegister<FunctionParserGetArrayItem> 
register_get_array_item;
+        /**
+            parse get_array_item(arr, idx) as
+            if (idx <= 0)
+                null
+            else
+                arrayElementOrNull(arr, idx)
+        */
+        auto parsed_args = parseFunctionArguments(substrait_func, actions_dag);
+        if (parsed_args.size() != 2)
+            throw Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, 
"Function {} requires exactly two arguments", getName());
+
+        const auto * arr_arg = parsed_args[0];
+        const auto * idx_arg = parsed_args[1];
+        const DataTypeArray * arr_type = 
checkAndGetDataType<DataTypeArray>(removeNullable(arr_arg->result_type).get());
+        if (!arr_type)
+            throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "First argument of 
function {} must be an array", getName());
+
+        /// arrayElementOrNull(arrary, idx)
+        const auto * array_element_node = toFunctionNode(actions_dag, 
"arrayElementOrNull", {arr_arg, idx_arg});
+
+        /// idx <= 0
+        const auto * zero_node = addColumnToActionsDAG(actions_dag, 
std::make_shared<DataTypeInt32>(), 0);
+        const auto * idx_le_zero_node = toFunctionNode(actions_dag, 
"lessOrEquals", {idx_arg, zero_node});
+
+        const auto * null_node = addColumnToActionsDAG(actions_dag, 
makeNullable(arr_type->getNestedType()), Field{});
+        const auto * if_node = toFunctionNode(actions_dag, "if", 
{idx_le_zero_node, null_node, array_element_node});
+        return convertNodeTypeIfNeeded(substrait_func, if_node, actions_dag);
+    }
+};
+
+static FunctionParserRegister<FunctionParserGetArrayItem> 
register_get_array_item;
+
 }
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 64fcaeea45..0091c0c4dd 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -664,11 +664,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
       "SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then 
non-NaN value")
     .excludeGlutenTest("Shuffle")
   enableSuite[GlutenComplexTypeSuite]
-    .exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
-    .exclude("SPARK-33460: GetMapValue NoSuchElementException")
-    .exclude("GetArrayStructFields")
-    .exclude("CreateMap")
-    .exclude("MapFromArrays")
   enableSuite[GlutenConditionalExpressionSuite]
     .exclude("case when")
     .exclude("if/case when - null flags of non-primitive types")
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index c260d3f802..77be9e78c0 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -692,11 +692,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
       "SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then 
non-NaN value")
     .excludeGlutenTest("Shuffle")
   enableSuite[GlutenComplexTypeSuite]
-    .exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
-    .exclude("SPARK-33460: GetMapValue NoSuchElementException")
-    .exclude("GetArrayStructFields")
-    .exclude("CreateMap")
-    .exclude("MapFromArrays")
   enableSuite[GlutenConditionalExpressionSuite]
     .exclude("case when")
     .exclude("if/case when - null flags of non-primitive types")
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 76ca12b0ac..d73e44fdc0 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -581,11 +581,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
       "SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then 
non-NaN value")
     .excludeGlutenTest("Shuffle")
   enableSuite[GlutenComplexTypeSuite]
-    .exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
-    .exclude("SPARK-33460: GetMapValue NoSuchElementException")
-    .exclude("GetArrayStructFields")
-    .exclude("CreateMap")
-    .exclude("MapFromArrays")
   enableSuite[GlutenConditionalExpressionSuite]
     .exclude("case when")
     .exclude("if/case when - null flags of non-primitive types")
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index a3553935ae..2bd1bacb4f 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -581,11 +581,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
       "SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then 
non-NaN value")
     .excludeGlutenTest("Shuffle")
   enableSuite[GlutenComplexTypeSuite]
-    .exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
-    .exclude("SPARK-33460: GetMapValue NoSuchElementException")
-    .exclude("GetArrayStructFields")
-    .exclude("CreateMap")
-    .exclude("MapFromArrays")
   enableSuite[GlutenConditionalExpressionSuite]
     .exclude("case when")
     .exclude("if/case when - null flags of non-primitive types")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to