This is an automated email from the ASF dual-hosted git repository.
liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new c94cde4ff [GLUTEN-5896][CH]Fix greatest diff #5920
c94cde4ff is described below
commit c94cde4ffd479de8a1b09cbebaa46f9344cdb2e6
Author: KevinyhZou <[email protected]>
AuthorDate: Fri May 31 17:41:38 2024 +0800
[GLUTEN-5896][CH]Fix greatest diff #5920
What changes were proposed in this pull request?
(Please fill in changes proposed in this fix)
(Fixes: #5896)
How was this patch tested?
TEST BY UT
---
.../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 12 ++++
.../Functions/SparkFunctionGreatest.cpp | 75 ++++++++++++++++++++++
cpp-ch/local-engine/Parser/SerializedPlanParser.h | 2 +-
3 files changed, 88 insertions(+), 1 deletion(-)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index eec0ad874..748bd5a7f 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -2551,5 +2551,17 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends
GlutenClickHouseTPCHAbstr
compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
spark.sql("drop table test_tbl_5096")
}
+
+ test("GLUTEN-5896: Bug fix greatest diff") {
+ val tbl_create_sql =
+ "create table test_tbl_5896(id bigint, x1 int, x2 int, x3 int) using
parquet"
+ val tbl_insert_sql =
+ "insert into test_tbl_5896 values(1, 12, NULL, 13), (2, NULL, NULL,
NULL), (3, 11, NULL, NULL), (4, 10, 9, 8)"
+ val select_sql = "select id, greatest(x1, x2, x3) from test_tbl_5896"
+ spark.sql(tbl_create_sql)
+ spark.sql(tbl_insert_sql)
+ compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
+ spark.sql("drop table test_tbl_5896")
+ }
}
// scalastyle:on line.size.limit
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionGreatest.cpp
b/cpp-ch/local-engine/Functions/SparkFunctionGreatest.cpp
new file mode 100644
index 000000000..9577d65ec
--- /dev/null
+++ b/cpp-ch/local-engine/Functions/SparkFunctionGreatest.cpp
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <Functions/LeastGreatestGeneric.h>
+#include <DataTypes/getLeastSupertype.h>
+#include <DataTypes/DataTypeNullable.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+}
+
+namespace local_engine
+{
+class SparkFunctionGreatest : public
DB::FunctionLeastGreatestGeneric<DB::LeastGreatest::Greatest>
+{
+public:
+ static constexpr auto name = "sparkGreatest";
+ static DB::FunctionPtr create(DB::ContextPtr) { return
std::make_shared<SparkFunctionGreatest>(); }
+ SparkFunctionGreatest() = default;
+ ~SparkFunctionGreatest() override = default;
+ bool useDefaultImplementationForNulls() const override { return false; }
+
+private:
+ DB::DataTypePtr getReturnTypeImpl(const DB::DataTypes & types) const
override
+ {
+ if (types.empty())
+ throw
DB::Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}
cannot be called without arguments", name);
+ return makeNullable(getLeastSupertype(types));
+ }
+
+ DB::ColumnPtr executeImpl(const DB::ColumnsWithTypeAndName & arguments,
const DB::DataTypePtr & result_type, size_t input_rows_count) const override
+ {
+ size_t num_arguments = arguments.size();
+ DB::Columns converted_columns(num_arguments);
+ for (size_t arg = 0; arg < num_arguments; ++arg)
+ converted_columns[arg] = castColumn(arguments[arg],
result_type)->convertToFullColumnIfConst();
+ auto result_column = result_type->createColumn();
+ result_column->reserve(input_rows_count);
+ for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
+ {
+ size_t best_arg = 0;
+ for (size_t arg = 1; arg < num_arguments; ++arg)
+ {
+ auto cmp_result = converted_columns[arg]->compareAt(row_num,
row_num, *converted_columns[best_arg], -1);
+ if (cmp_result > 0)
+ best_arg = arg;
+ }
+ result_column->insertFrom(*converted_columns[best_arg], row_num);
+ }
+ return result_column;
+ }
+};
+
+REGISTER_FUNCTION(SparkGreatest)
+{
+ factory.registerFunction<SparkFunctionGreatest>();
+}
+}
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h
b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
index a636ebb93..73448b069 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
@@ -104,7 +104,7 @@ static const std::map<std::string, std::string>
SCALAR_FUNCTIONS
{"hypot", "hypot"},
{"sign", "sign"},
{"radians", "radians"},
- {"greatest", "greatest"},
+ {"greatest", "sparkGreatest"},
{"least", "least"},
{"shiftleft", "bitShiftLeft"},
{"shiftright", "bitShiftRight"},
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]