This is an automated email from the ASF dual-hosted git repository.
zhangzc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 73eb21db4 [GLUTEN-5904][CH] Convert nan to null which comes from
stddev (#5913)
73eb21db4 is described below
commit 73eb21db45a83c3180518bfcef3c5d343595f452
Author: lgbo <[email protected]>
AuthorDate: Thu May 30 23:35:59 2024 +0800
[GLUTEN-5904][CH] Convert nan to null which comes from stddev (#5913)
[CH] Convert nan to null which comes from stddev
---
.../test/resources/tpcds-queries-output/q17.out | 4 +-
.../execution/GlutenClickHouseTPCHSuite.scala | 15 +++++
.../CommonAggregateFunctionParser.cpp | 2 -
.../SimpleStatisticsFunctions.cpp | 70 ++++++++++++++++++++++
4 files changed, 87 insertions(+), 4 deletions(-)
diff --git
a/backends-clickhouse/src/test/resources/tpcds-queries-output/q17.out
b/backends-clickhouse/src/test/resources/tpcds-queries-output/q17.out
index ab9c217f7..95891dd39 100644
--- a/backends-clickhouse/src/test/resources/tpcds-queries-output/q17.out
+++ b/backends-clickhouse/src/test/resources/tpcds-queries-output/q17.out
@@ -1,2 +1,2 @@
-AAAAAAAADOCAAAAA|-|Little, national services will buy young molecules. In part
video-taped activities join
now|-|TN|-|1|-|24.0|-|NaN|-|NaN|-|1|-|11.0|-|NaN|-|NaN|-|1|-|49.0|-|NaN|-|NaN
-AAAAAAAAEBOBAAAA|-|Special words should tell by a
follower|-|TN|-|1|-|66.0|-|NaN|-|NaN|-|1|-|38.0|-|NaN|-|NaN|-|1|-|56.0|-|NaN|-|NaN
+AAAAAAAADOCAAAAA|-|Little, national services will buy young molecules. In part
video-taped activities join
now|-|TN|-|1|-|24.0|-|null|-|null|-|1|-|11.0|-|null|-|null|-|1|-|49.0|-|null|-|null
+AAAAAAAAEBOBAAAA|-|Special words should tell by a
follower|-|TN|-|1|-|66.0|-|null|-|null|-|1|-|38.0|-|null|-|null|-|1|-|56.0|-|null|-|null
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala
index 6dcb83de4..96b2cb09b 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala
@@ -481,5 +481,20 @@ class GlutenClickHouseTPCHSuite extends
GlutenClickHouseTPCHAbstractSuite {
spark.sql(table_drop_sql)
}
+
+ test("GLUTEN-5904 NaN values from stddev") {
+ val sql1 =
+ """
+ |select a, stddev(b/c) from (select * from values (1,2, 1), (1,3,0) as
data(a,b,c))
+ |group by a
+ |""".stripMargin
+ compareResultsAgainstVanillaSpark(sql1, true, { _ => })
+ val sql2 =
+ """
+ |select a, stddev(b) from (select * from values (1,2, 1) as
data(a,b,c)) group by a
+ |""".stripMargin
+ compareResultsAgainstVanillaSpark(sql2, true, { _ => })
+
+ }
}
// scalastyle:off line.size.limit
diff --git
a/cpp-ch/local-engine/Parser/aggregate_function_parser/CommonAggregateFunctionParser.cpp
b/cpp-ch/local-engine/Parser/aggregate_function_parser/CommonAggregateFunctionParser.cpp
index afe65f793..1619c7410 100644
---
a/cpp-ch/local-engine/Parser/aggregate_function_parser/CommonAggregateFunctionParser.cpp
+++
b/cpp-ch/local-engine/Parser/aggregate_function_parser/CommonAggregateFunctionParser.cpp
@@ -25,8 +25,6 @@ REGISTER_COMMON_AGGREGATE_FUNCTION_PARSER(Sum, sum, sum)
REGISTER_COMMON_AGGREGATE_FUNCTION_PARSER(Avg, avg, avg)
REGISTER_COMMON_AGGREGATE_FUNCTION_PARSER(Min, min, min)
REGISTER_COMMON_AGGREGATE_FUNCTION_PARSER(Max, max, max)
-REGISTER_COMMON_AGGREGATE_FUNCTION_PARSER(StdDev, stddev, stddev_samp)
-REGISTER_COMMON_AGGREGATE_FUNCTION_PARSER(StdDevSamp, stddev_samp, stddev_samp)
REGISTER_COMMON_AGGREGATE_FUNCTION_PARSER(StdDevPop, stddev_pop, stddev_pop)
REGISTER_COMMON_AGGREGATE_FUNCTION_PARSER(BitAnd, bit_and, groupBitAnd)
REGISTER_COMMON_AGGREGATE_FUNCTION_PARSER(BitOr, bit_or, groupBitOr)
diff --git
a/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp
b/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp
new file mode 100644
index 000000000..7e75e20bb
--- /dev/null
+++
b/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <Parser/AggregateFunctionParser.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <Poco/Logger.h>
+#include <Common/logger_useful.h>
+
+namespace local_engine
+{
+/// For stddev
+struct StddevNameStruct
+{
+ static constexpr auto spark_name = "stddev";
+ static constexpr auto ch_name = "stddev";
+};
+
+struct StddevSampNameStruct
+{
+ static constexpr auto spark_name = "stddev_samp";
+ static constexpr auto ch_name = "stddev_samp";
+};
+template <typename NameStruct>
+class AggregateFunctionParserStddev final : public AggregateFunctionParser
+{
+public:
+ AggregateFunctionParserStddev(SerializedPlanParser * plan_parser_) :
AggregateFunctionParser(plan_parser_) { }
+ ~AggregateFunctionParserStddev() override = default;
+ String getName() const override { return NameStruct::spark_name; }
+ static constexpr auto name = NameStruct::spark_name;
+ String getCHFunctionName(const CommonFunctionInfo &) const override {
return NameStruct::ch_name; }
+ String getCHFunctionName(DB::DataTypes &) const override { return
NameStruct::ch_name; }
+ const DB::ActionsDAG::Node * convertNodeTypeIfNeeded(
+ const CommonFunctionInfo & func_info,
+ const DB::ActionsDAG::Node * func_node,
+ DB::ActionsDAGPtr & actions_dag,
+ bool with_nullability) const override
+ {
+ /// result is nullable.
+ /// if result is NaN, convert it to NULL.
+ auto is_nan_func_node = toFunctionNode(actions_dag, "isNaN",
getUniqueName("isNaN"), {func_node});
+ auto null_type = DB::makeNullable(func_node->result_type);
+ auto nullable_col = null_type->createColumn();
+ nullable_col->insertDefault();
+ const auto * null_node
+ =
&actions_dag->addColumn(DB::ColumnWithTypeAndName(std::move(nullable_col),
null_type, getUniqueName("null")));
+ DB::ActionsDAG::NodeRawConstPtrs convert_nan_func_args =
{is_nan_func_node, null_node, func_node};
+
+ func_node = toFunctionNode(actions_dag, "if", func_node->result_name,
convert_nan_func_args);
+ actions_dag->addOrReplaceInOutputs(*func_node);
+ return func_node;
+ }
+};
+
+static const
AggregateFunctionParserRegister<AggregateFunctionParserStddev<StddevNameStruct>>
registerer_stddev;
+static const
AggregateFunctionParserRegister<AggregateFunctionParserStddev<StddevSampNameStruct>>
registerer_stddev_samp;
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]