This is an automated email from the ASF dual-hosted git repository.
zhangzc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new fedbc72c91 Revert "[GLUTEN-8080][CH]Support function
transform_keys/transform_values (#8…" (#8121)
fedbc72c91 is described below
commit fedbc72c9153f75c83f1717360b48f4e40f4e7fd
Author: 李扬 <[email protected]>
AuthorDate: Mon Dec 2 13:06:22 2024 +0800
Revert "[GLUTEN-8080][CH]Support function transform_keys/transform_values
(#8…" (#8121)
This reverts commit d75e90f5c57be3bf345ed861c52d474550001282.
---
.../org/apache/gluten/utils/CHExpressionUtil.scala | 2 +
.../execution/GlutenFunctionValidateSuite.scala | 12 ---
cpp-ch/local-engine/Parser/FunctionParser.cpp | 2 +
.../mapHighOrderFunctions.cpp | 92 ----------------------
.../utils/clickhouse/ClickHouseTestSettings.scala | 4 -
.../utils/clickhouse/ClickHouseTestSettings.scala | 4 -
.../utils/clickhouse/ClickHouseTestSettings.scala | 4 -
.../utils/clickhouse/ClickHouseTestSettings.scala | 4 -
8 files changed, 4 insertions(+), 120 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index 1dd815b6d7..f6d18d7a22 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -203,6 +203,8 @@ object CHExpressionUtil {
TO_UTC_TIMESTAMP -> UtcTimestampValidator(),
FROM_UTC_TIMESTAMP -> UtcTimestampValidator(),
STACK -> DefaultValidator(),
+ TRANSFORM_KEYS -> DefaultValidator(),
+ TRANSFORM_VALUES -> DefaultValidator(),
RAISE_ERROR -> DefaultValidator(),
WIDTH_BUCKET -> DefaultValidator()
)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
index 39b5421f5d..dbe8852290 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
@@ -860,16 +860,4 @@ class GlutenFunctionValidateSuite extends
GlutenClickHouseWholeStageTransformerS
val sql = "select cast(id % 2 = 1 as string) from range(10)"
compareResultsAgainstVanillaSpark(sql, true, { _ => })
}
-
- test("Test transform_keys/transform_values") {
- val sql = """
- |select
- | transform_keys(map_from_arrays(array(id+1, id+2, id+3),
- | array(1, id+2, 3)), (k, v) -> k + 1),
- | transform_values(map_from_arrays(array(id+1, id+2, id+3),
- | array(1, id+2, 3)), (k, v) -> v + 1)
- |from range(10)
- |""".stripMargin
- compareResultsAgainstVanillaSpark(sql, true, { _ => })
- }
}
diff --git a/cpp-ch/local-engine/Parser/FunctionParser.cpp
b/cpp-ch/local-engine/Parser/FunctionParser.cpp
index 6ea5148ea4..7e794dabec 100644
--- a/cpp-ch/local-engine/Parser/FunctionParser.cpp
+++ b/cpp-ch/local-engine/Parser/FunctionParser.cpp
@@ -181,7 +181,9 @@ FunctionParserPtr FunctionParserFactory::get(const String &
name, ParserContextP
{
auto res = tryGet(name, ctx);
if (!res)
+ {
throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown function parser
{}", name);
+ }
return res;
}
diff --git
a/cpp-ch/local-engine/Parser/scalar_function_parser/mapHighOrderFunctions.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/mapHighOrderFunctions.cpp
deleted file mode 100644
index e559980f85..0000000000
---
a/cpp-ch/local-engine/Parser/scalar_function_parser/mapHighOrderFunctions.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeFunction.h>
-#include <DataTypes/DataTypeNullable.h>
-#include <Parser/FunctionParser.h>
-#include <Parser/TypeParser.h>
-#include <Parser/scalar_function_parser/lambdaFunction.h>
-#include <Common/BlockTypeUtils.h>
-#include <Common/CHUtil.h>
-#include <Common/Exception.h>
-#include <Common/logger_useful.h>
-#include "DataTypes/DataTypeMap.h"
-
-namespace DB::ErrorCodes
-{
- extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
- extern const int BAD_ARGUMENTS;
-}
-
-namespace local_engine
-{
-
-template <bool transform_keys = true>
-class FunctionParserMapTransformImpl : public FunctionParser
-{
-public:
- static constexpr auto name = transform_keys ? "transform_keys" :
"transform_values";
- String getName() const override { return name; }
-
- explicit FunctionParserMapTransformImpl(ParserContextPtr parser_context_)
: FunctionParser(parser_context_) {}
- ~FunctionParserMapTransformImpl() override = default;
-
- const DB::ActionsDAG::Node *
- parse(const substrait::Expression_ScalarFunction & substrait_func,
DB::ActionsDAG & actions_dag) const override
- {
- /// Parse spark transform_keys(map, func) as CH
mapFromArrays(arrayMap(func, cast(map as array)), mapValues(map))
- /// Parse spark transform_values(map, func) as CH
mapFromArrays(mapKeys(map), arrayMap(func, cast(map as array)))
- auto parsed_args = parseFunctionArguments(substrait_func, actions_dag);
- if (parsed_args.size() != 2)
- throw DB::Exception(DB::ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH,
"{} function must have three arguments", getName());
-
- auto lambda_args = collectLambdaArguments(parser_context,
substrait_func.arguments()[1].value().scalar_function());
- if (lambda_args.size() != 2)
- throw DB::Exception(
- DB::ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "The lambda
function in {} must have two arguments", getName());
-
- const auto * map_node = parsed_args[0];
- const auto * func_node = parsed_args[1];
- const auto & map_type = map_node->result_type;
- auto array_type =
checkAndGetDataType<DataTypeMap>(removeNullable(map_type).get())->getNestedType();
- if (map_type->isNullable())
- array_type = std::make_shared<DataTypeNullable>(array_type);
- const auto * array_node =
ActionsDAGUtil::convertNodeTypeIfNeeded(actions_dag, map_node, array_type);
- const auto * transformed_node = toFunctionNode(actions_dag,
"arrayMap", {func_node, array_node});
-
- const DB::ActionsDAG::Node * result_node = nullptr;
- if constexpr (transform_keys)
- {
- const auto * nontransformed_node = toFunctionNode(actions_dag,
"mapValues", {parsed_args[0]});
- result_node = toFunctionNode(actions_dag, "mapFromArrays",
{transformed_node, nontransformed_node});
- }
- else
- {
- const auto * nontransformed_node = toFunctionNode(actions_dag,
"mapKeys", {parsed_args[0]});
- result_node = toFunctionNode(actions_dag, "mapFromArrays",
{nontransformed_node, transformed_node});
- }
- return convertNodeTypeIfNeeded(substrait_func, result_node,
actions_dag);
- }
-};
-
-using FunctionParserTransformKeys = FunctionParserMapTransformImpl<true>;
-using FunctionParserTransformValues = FunctionParserMapTransformImpl<false>;
-
-static FunctionParserRegister<FunctionParserTransformKeys>
register_transform_keys;
-static FunctionParserRegister<FunctionParserTransformValues>
register_transform_values;
-}
\ No newline at end of file
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 71e32bdccf..2eb5bd11ff 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -166,10 +166,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("aggregate function - array for non-primitive type")
.exclude("SPARK-14393: values generated by non-deterministic functions
shouldn't change after coalesce or union")
.exclude("SPARK-24734: Fix containsNull of Concat for array type")
- .exclude("transform keys function - primitive data types")
- .exclude("transform keys function - Invalid lambda functions and
exceptions")
- .exclude("transform values function - test primitive data types")
- .exclude("transform values function - test empty")
enableSuite[GlutenDataFrameHintSuite]
enableSuite[GlutenDataFrameImplicitsSuite]
enableSuite[GlutenDataFrameJoinSuite].exclude(
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index ce09d0f595..a7bf5d4da9 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -184,10 +184,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("aggregate function - array for non-primitive type")
.exclude("SPARK-14393: values generated by non-deterministic functions
shouldn't change after coalesce or union")
.exclude("SPARK-24734: Fix containsNull of Concat for array type")
- .exclude("transform keys function - primitive data types")
- .exclude("transform keys function - Invalid lambda functions and
exceptions")
- .exclude("transform values function - test primitive data types")
- .exclude("transform values function - test empty")
enableSuite[GlutenDataFrameHintSuite]
enableSuite[GlutenDataFrameImplicitsSuite]
enableSuite[GlutenDataFrameJoinSuite].exclude(
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 71e2f6375e..b7e3905740 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -186,10 +186,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("aggregate function - array for non-primitive type")
.exclude("SPARK-14393: values generated by non-deterministic functions
shouldn't change after coalesce or union")
.exclude("SPARK-24734: Fix containsNull of Concat for array type")
- .exclude("transform keys function - primitive data types")
- .exclude("transform keys function - Invalid lambda functions and
exceptions")
- .exclude("transform values function - test primitive data types")
- .exclude("transform values function - test empty")
enableSuite[GlutenDataFrameHintSuite]
enableSuite[GlutenDataFrameImplicitsSuite]
enableSuite[GlutenDataFrameJoinSuite].exclude(
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index f08b66972c..8ce145735d 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -186,10 +186,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("aggregate function - array for non-primitive type")
.exclude("SPARK-14393: values generated by non-deterministic functions
shouldn't change after coalesce or union")
.exclude("SPARK-24734: Fix containsNull of Concat for array type")
- .exclude("transform keys function - primitive data types")
- .exclude("transform keys function - Invalid lambda functions and
exceptions")
- .exclude("transform values function - test primitive data types")
- .exclude("transform values function - test empty")
enableSuite[GlutenDataFrameHintSuite]
enableSuite[GlutenDataFrameImplicitsSuite]
enableSuite[GlutenDataFrameJoinSuite].exclude(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]