This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 0369a6b0db [GLUTEN-7240][CH] Fix all failed uts in
GlutenComplexTypeSuite (#7242)
0369a6b0db is described below
commit 0369a6b0dbffa27830727a0951b08260ed26c278
Author: 李扬 <[email protected]>
AuthorDate: Fri Oct 11 10:59:52 2024 +0800
[GLUTEN-7240][CH] Fix all failed uts in GlutenComplexTypeSuite (#7242)
* fix failed uts
* rename files
* fix failed uts
* fix failed uts
* update version
* update version
* update version
---
.../CommonScalarFunctionParser.cpp | 2 +-
.../Parser/scalar_function_parser/arrayElement.h | 80 ----------------------
.../Parser/scalar_function_parser/elementAt.cpp | 58 +++++++++++++---
.../Parser/scalar_function_parser/getArrayItem.cpp | 67 +++++++++++++++---
.../utils/clickhouse/ClickHouseTestSettings.scala | 5 --
.../utils/clickhouse/ClickHouseTestSettings.scala | 5 --
.../utils/clickhouse/ClickHouseTestSettings.scala | 5 --
.../utils/clickhouse/ClickHouseTestSettings.scala | 5 --
8 files changed, 108 insertions(+), 119 deletions(-)
diff --git
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
index 695166a894..8fe6fb8da8 100644
---
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
+++
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
@@ -171,7 +171,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(ArraysZip,
arrays_zip, arrayZipUnaligned)
// map functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Map, map, map);
-REGISTER_COMMON_SCALAR_FUNCTION_PARSER(GetMapValue, get_map_value,
arrayElement);
+REGISTER_COMMON_SCALAR_FUNCTION_PARSER(GetMapValue, get_map_value,
arrayElementOrNull);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapKeys, map_keys, mapKeys);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapValues, map_values, mapValues);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapFromArrays, map_from_arrays,
mapFromArrays);
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/arrayElement.h
b/cpp-ch/local-engine/Parser/scalar_function_parser/arrayElement.h
deleted file mode 100644
index 964130ebcb..0000000000
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/arrayElement.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <DataTypes/IDataType.h>
-#include <Parser/FunctionParser.h>
-
-#include <Core/Field.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeNullable.h>
-#include <Functions/FunctionHelpers.h>
-
-namespace DB
-{
-namespace ErrorCodes
-{
- extern const int BAD_ARGUMENTS;
- extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-}
-}
-
-namespace local_engine
-{
-
-class FunctionParserArrayElement : public FunctionParser
-{
-
-public:
- explicit FunctionParserArrayElement(SerializedPlanParser * plan_parser_) :
FunctionParser(plan_parser_) { }
- ~FunctionParserArrayElement() override = default;
-
- const ActionsDAG::Node * parse(
- const substrait::Expression_ScalarFunction & substrait_func,
- ActionsDAG & actions_dag) const override
- {
- /**
- parse arrayElement(arr, idx) as
- if (idx > size(arr))
- null
- else
- arrayElement(arr, idx)
- */
- auto parsed_args = parseFunctionArguments(substrait_func, actions_dag);
- if (parsed_args.size() != 2)
- throw Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} requires exactly two arguments", getName());
-
- //arrayElement(arrary, idx)
- auto * array_element_node = toFunctionNode(actions_dag,
"arrayElement", parsed_args);
- //idx > length(array)
- auto * length_node = toFunctionNode(actions_dag, "length",
{parsed_args[0]});
- auto * greater_or_equals_node = toFunctionNode(actions_dag, "greater",
{parsed_args[1], length_node});
- const DataTypeArray * array_type =
checkAndGetDataType<DataTypeArray>(removeNullable(parsed_args[0]->result_type).get());
- if (!array_type)
- throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "First argument for
function {} must be an array", getName());
- const DataTypePtr element_type = array_type->getNestedType();
- const auto * null_const_node = addColumnToActionsDAG(actions_dag,
makeNullable(element_type), Field{});
- //if(idx > length(array), NULL, arrayElement(array, idx))
- auto * if_node = toFunctionNode(actions_dag, "if",
{greater_or_equals_node, null_const_node, array_element_node});
- return if_node;
- }
-
- String getCHFunctionName(const substrait::Expression_ScalarFunction &
/*substrait_func*/) const override
- {
- return "arrayElement";
- }
-};
-
-}
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/elementAt.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/elementAt.cpp
index b5587e79dc..021e9245e2 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/elementAt.cpp
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/elementAt.cpp
@@ -15,29 +15,71 @@
* limitations under the License.
*/
-#include <Parser/scalar_function_parser/arrayElement.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/IDataType.h>
+#include <Parser/FunctionParser.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+extern const int BAD_ARGUMENTS;
+}
+}
namespace local_engine
{
-class FunctionParserElementAt : public FunctionParserArrayElement
+class FunctionParserElementAt : public FunctionParser
{
public:
- explicit FunctionParserElementAt(SerializedPlanParser * plan_parser_) :
FunctionParserArrayElement(plan_parser_) { }
+ explicit FunctionParserElementAt(SerializedPlanParser * plan_parser_) :
FunctionParser(plan_parser_) { }
~FunctionParserElementAt() override = default;
+
static constexpr auto name = "element_at";
String getName() const override { return name; }
const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction
& substrait_func, ActionsDAG & actions_dag) const override
{
+ /*
+ parse element_at(arr, idx) as
+ if (idx == 0)
+ throwIf(idx == 0)
+ else
+ arrayElementOrNull(arr, idx)
+
+ parse element_at(map, key) as arrayElementOrNull(map, key)
+ */
auto parsed_args = parseFunctionArguments(substrait_func, actions_dag);
if (parsed_args.size() != 2)
- throw Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} requires exactly two arguments", getName());
- if (isMap(removeNullable(parsed_args[0]->result_type)))
- return toFunctionNode(actions_dag, "arrayElement", parsed_args);
- else
- return FunctionParserArrayElement::parse(substrait_func,
actions_dag);
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} requires exactly two arguments", getName());
+
+ auto arg_type = removeNullable(parsed_args[0]->result_type);
+ if (isMap(arg_type))
+ {
+ const auto * map_arg = parsed_args[0];
+ const auto * key_arg = parsed_args[1];
+ const auto * result_node = toFunctionNode(actions_dag,
"arrayElementOrNull", {map_arg, key_arg});
+ return convertNodeTypeIfNeeded(substrait_func, result_node,
actions_dag);
+ }
+ else if (isArray(arg_type))
+ {
+ const auto * arr_arg = parsed_args[0];
+ const auto * idx_arg = parsed_args[1];
+
+ const auto * zero_node = addColumnToActionsDAG(actions_dag,
std::make_shared<DataTypeInt32>(), 0);
+ const auto * equal_zero_node = toFunctionNode(actions_dag,
"equals", {idx_arg, zero_node});
+ const auto * throw_message
+ = addColumnToActionsDAG(actions_dag,
std::make_shared<DataTypeString>(), "SQL array indices start at 1");
+ /// throwIf(idx == 0, 'SQL array indices start at 1')
+ const auto * throw_if_node = toFunctionNode(actions_dag,
"throwIf", {equal_zero_node, throw_message});
+
+ const auto * array_element_node = toFunctionNode(actions_dag,
"arrayElementOrNull", {arr_arg, idx_arg});
+ /// if(throwIf(idx == 0, 'SQL array indices start at 1'),
arrayElementOrNull(arr, idx), arrayElementOrNull(arr, idx))
+ const auto * if_node = toFunctionNode(actions_dag, "if",
{throw_if_node, array_element_node, array_element_node});
+ return convertNodeTypeIfNeeded(substrait_func, if_node,
actions_dag);
+ }
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "First argument of function
{} must be an array or a map", getName());
}
};
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/getArrayItem.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/getArrayItem.cpp
index bdb0bcd098..7b73b4a10c 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/getArrayItem.cpp
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/getArrayItem.cpp
@@ -14,19 +14,66 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+#include <DataTypes/IDataType.h>
+#include <Parser/FunctionParser.h>
-#include <Parser/scalar_function_parser/arrayElement.h>
+#include <Core/Field.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <Functions/FunctionHelpers.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+}
namespace local_engine
{
- class FunctionParserGetArrayItem : public FunctionParserArrayElement
+
+class FunctionParserGetArrayItem : public FunctionParser
+{
+public:
+ explicit FunctionParserGetArrayItem(SerializedPlanParser * plan_parser_) :
FunctionParser(plan_parser_) { }
+ ~FunctionParserGetArrayItem() override = default;
+
+ static constexpr auto name = "get_array_item";
+ String getName() const override { return name; }
+
+ const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction
& substrait_func, ActionsDAG & actions_dag) const override
{
- public:
- explicit FunctionParserGetArrayItem(SerializedPlanParser *
plan_parser_) : FunctionParserArrayElement(plan_parser_) { }
- ~FunctionParserGetArrayItem() override = default;
- static constexpr auto name = "get_array_item";
- String getName() const override { return name; }
- };
-
- static FunctionParserRegister<FunctionParserGetArrayItem>
register_get_array_item;
+ /**
+ parse get_array_item(arr, idx) as
+ if (idx <= 0)
+ null
+ else
+ arrayElementOrNull(arr, idx)
+ */
+ auto parsed_args = parseFunctionArguments(substrait_func, actions_dag);
+ if (parsed_args.size() != 2)
+ throw Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} requires exactly two arguments", getName());
+
+ const auto * arr_arg = parsed_args[0];
+ const auto * idx_arg = parsed_args[1];
+ const DataTypeArray * arr_type =
checkAndGetDataType<DataTypeArray>(removeNullable(arr_arg->result_type).get());
+ if (!arr_type)
+ throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "First argument of
function {} must be an array", getName());
+
+ /// arrayElementOrNull(arrary, idx)
+ const auto * array_element_node = toFunctionNode(actions_dag,
"arrayElementOrNull", {arr_arg, idx_arg});
+
+ /// idx <= 0
+ const auto * zero_node = addColumnToActionsDAG(actions_dag,
std::make_shared<DataTypeInt32>(), 0);
+ const auto * idx_le_zero_node = toFunctionNode(actions_dag,
"lessOrEquals", {idx_arg, zero_node});
+
+ const auto * null_node = addColumnToActionsDAG(actions_dag,
makeNullable(arr_type->getNestedType()), Field{});
+ const auto * if_node = toFunctionNode(actions_dag, "if",
{idx_le_zero_node, null_node, array_element_node});
+ return convertNodeTypeIfNeeded(substrait_func, if_node, actions_dag);
+ }
+};
+
+static FunctionParserRegister<FunctionParserGetArrayItem>
register_get_array_item;
+
}
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 64fcaeea45..0091c0c4dd 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -664,11 +664,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
"SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then
non-NaN value")
.excludeGlutenTest("Shuffle")
enableSuite[GlutenComplexTypeSuite]
- .exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
- .exclude("SPARK-33460: GetMapValue NoSuchElementException")
- .exclude("GetArrayStructFields")
- .exclude("CreateMap")
- .exclude("MapFromArrays")
enableSuite[GlutenConditionalExpressionSuite]
.exclude("case when")
.exclude("if/case when - null flags of non-primitive types")
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index c260d3f802..77be9e78c0 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -692,11 +692,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
"SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then
non-NaN value")
.excludeGlutenTest("Shuffle")
enableSuite[GlutenComplexTypeSuite]
- .exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
- .exclude("SPARK-33460: GetMapValue NoSuchElementException")
- .exclude("GetArrayStructFields")
- .exclude("CreateMap")
- .exclude("MapFromArrays")
enableSuite[GlutenConditionalExpressionSuite]
.exclude("case when")
.exclude("if/case when - null flags of non-primitive types")
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 76ca12b0ac..d73e44fdc0 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -581,11 +581,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
"SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then
non-NaN value")
.excludeGlutenTest("Shuffle")
enableSuite[GlutenComplexTypeSuite]
- .exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
- .exclude("SPARK-33460: GetMapValue NoSuchElementException")
- .exclude("GetArrayStructFields")
- .exclude("CreateMap")
- .exclude("MapFromArrays")
enableSuite[GlutenConditionalExpressionSuite]
.exclude("case when")
.exclude("if/case when - null flags of non-primitive types")
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index a3553935ae..2bd1bacb4f 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -581,11 +581,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
"SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then
non-NaN value")
.excludeGlutenTest("Shuffle")
enableSuite[GlutenComplexTypeSuite]
- .exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
- .exclude("SPARK-33460: GetMapValue NoSuchElementException")
- .exclude("GetArrayStructFields")
- .exclude("CreateMap")
- .exclude("MapFromArrays")
enableSuite[GlutenConditionalExpressionSuite]
.exclude("case when")
.exclude("if/case when - null flags of non-primitive types")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]