This is an automated email from the ASF dual-hosted git repository.

liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new c94cde4ff [GLUTEN-5896][CH]Fix greatest diff #5920
c94cde4ff is described below

commit c94cde4ffd479de8a1b09cbebaa46f9344cdb2e6
Author: KevinyhZou <[email protected]>
AuthorDate: Fri May 31 17:41:38 2024 +0800

    [GLUTEN-5896][CH]Fix greatest diff #5920
    
    What changes were proposed in this pull request?
    (Please fill in changes proposed in this fix)
    
    (Fixes: #5896)
    
    How was this patch tested?
    TEST BY UT
---
 .../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 12 ++++
 .../Functions/SparkFunctionGreatest.cpp            | 75 ++++++++++++++++++++++
 cpp-ch/local-engine/Parser/SerializedPlanParser.h  |  2 +-
 3 files changed, 88 insertions(+), 1 deletion(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index eec0ad874..748bd5a7f 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -2551,5 +2551,17 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
     compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
     spark.sql("drop table test_tbl_5096")
   }
+
+  test("GLUTEN-5896: Bug fix greatest diff") {
+    val tbl_create_sql =
+      "create table test_tbl_5896(id bigint, x1 int, x2 int, x3 int) using 
parquet"
+    val tbl_insert_sql =
+      "insert into test_tbl_5896 values(1, 12, NULL, 13), (2, NULL, NULL, 
NULL), (3, 11, NULL, NULL), (4, 10, 9, 8)"
+    val select_sql = "select id, greatest(x1, x2, x3) from test_tbl_5896"
+    spark.sql(tbl_create_sql)
+    spark.sql(tbl_insert_sql)
+    compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
+    spark.sql("drop table test_tbl_5896")
+  }
 }
 // scalastyle:on line.size.limit
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionGreatest.cpp 
b/cpp-ch/local-engine/Functions/SparkFunctionGreatest.cpp
new file mode 100644
index 000000000..9577d65ec
--- /dev/null
+++ b/cpp-ch/local-engine/Functions/SparkFunctionGreatest.cpp
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <Functions/LeastGreatestGeneric.h>
+#include <DataTypes/getLeastSupertype.h>
+#include <DataTypes/DataTypeNullable.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+}
+
+namespace local_engine
+{
+class SparkFunctionGreatest : public 
DB::FunctionLeastGreatestGeneric<DB::LeastGreatest::Greatest>
+{
+public:
+    static constexpr auto name = "sparkGreatest";
+    static DB::FunctionPtr create(DB::ContextPtr) { return 
std::make_shared<SparkFunctionGreatest>(); }
+    SparkFunctionGreatest() = default;
+    ~SparkFunctionGreatest() override = default;
+    bool useDefaultImplementationForNulls() const override { return false; }
+
+private:
+    DB::DataTypePtr getReturnTypeImpl(const DB::DataTypes & types) const 
override
+    {
+        if (types.empty())
+            throw 
DB::Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} 
cannot be called without arguments", name);
+        return makeNullable(getLeastSupertype(types));
+    }
+
+    DB::ColumnPtr executeImpl(const DB::ColumnsWithTypeAndName & arguments, 
const DB::DataTypePtr & result_type, size_t input_rows_count) const override
+    {
+        size_t num_arguments = arguments.size();
+        DB::Columns converted_columns(num_arguments);
+        for (size_t arg = 0; arg < num_arguments; ++arg)
+            converted_columns[arg] = castColumn(arguments[arg], 
result_type)->convertToFullColumnIfConst();
+        auto result_column = result_type->createColumn();
+        result_column->reserve(input_rows_count);
+        for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
+        {
+            size_t best_arg = 0;
+            for (size_t arg = 1; arg < num_arguments; ++arg)
+            {
+                auto cmp_result = converted_columns[arg]->compareAt(row_num, 
row_num, *converted_columns[best_arg], -1);
+                if (cmp_result > 0)
+                    best_arg = arg;
+            }
+            result_column->insertFrom(*converted_columns[best_arg], row_num);
+        }
+        return result_column;
+    }
+};
+
+REGISTER_FUNCTION(SparkGreatest)
+{
+    factory.registerFunction<SparkFunctionGreatest>();
+}
+}
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h 
b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
index a636ebb93..73448b069 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
@@ -104,7 +104,7 @@ static const std::map<std::string, std::string> 
SCALAR_FUNCTIONS
        {"hypot", "hypot"},
        {"sign", "sign"},
        {"radians", "radians"},
-       {"greatest", "greatest"},
+       {"greatest", "sparkGreatest"},
        {"least", "least"},
        {"shiftleft", "bitShiftLeft"},
        {"shiftright", "bitShiftRight"},


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to