This is an automated email from the ASF dual-hosted git repository.

liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 264a5dc02 [GLUTEN-5380][CH] Support bin function (#5383)
264a5dc02 is described below

commit 264a5dc02619c4c6b9ad6f51d9c7053c177fa917
Author: exmy <[email protected]>
AuthorDate: Mon Apr 15 17:47:10 2024 +0800

    [GLUTEN-5380][CH] Support bin function (#5383)
    
    What changes were proposed in this pull request?
    (Fixes: #5380)
    
    How was this patch tested?
    Pass CI
---
 .../GlutenClickHouseTPCHSaltNullParquetSuite.scala |   8 ++
 cpp-ch/local-engine/Functions/SparkFunctionBin.cpp | 131 +++++++++++++++++++++
 cpp-ch/local-engine/Parser/SerializedPlanParser.h  |   1 +
 .../utils/clickhouse/ClickHouseTestSettings.scala  |   1 -
 4 files changed, 140 insertions(+), 1 deletion(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index 5b7109e73..27cb39584 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -1159,6 +1159,14 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
     runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
   }
 
+  test("test bin function") {
+    runQueryAndCompare("select bin(id - 50) from range (100)")(
+      checkGlutenOperatorMatch[ProjectExecTransformer])
+
+    runQueryAndCompare("select bin(n_nationkey) from nation")(
+      checkGlutenOperatorMatch[ProjectExecTransformer])
+  }
+
   test("test 'sequence'") {
     runQueryAndCompare(
       "select sequence(id, id+10), sequence(id+10, id), sequence(id, id+10, 
3), " +
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionBin.cpp 
b/cpp-ch/local-engine/Functions/SparkFunctionBin.cpp
new file mode 100644
index 000000000..9aa44bf45
--- /dev/null
+++ b/cpp-ch/local-engine/Functions/SparkFunctionBin.cpp
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <cmath>
+#include <Columns/ColumnString.h>
+#include <Columns/IColumn.h>
+#include <DataTypes/DataTypeString.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int ILLEGAL_COLUMN;
+}
+}
+
+using namespace DB;
+namespace local_engine
+{
+namespace
+{
+    class SparkFunctionBin : public IFunction
+    {
+    public:
+        static constexpr auto name = "sparkBin";
+
+        static FunctionPtr create(ContextPtr) { return 
std::make_shared<SparkFunctionBin>(); }
+
+        String getName() const override { return name; }
+
+        size_t getNumberOfArguments() const override { return 1; }
+
+        bool useDefaultImplementationForConstants() const override { return 
true; }
+
+        bool isSuitableForShortCircuitArgumentsExecution(const 
DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+
+        DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & 
arguments) const override
+        {
+            if (!isInt64(arguments[0].type) && !isInt32(arguments[0].type))
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Illegal type {} of argument of function {}, expected 
Int64 or Int32.",
+                    arguments[0].type->getName(),
+                    getName());
+
+            return std::make_shared<DataTypeString>();
+        }
+
+        ColumnPtr
+        executeImpl(const ColumnsWithTypeAndName & arguments, const 
DataTypePtr & result_type, size_t /*input_rows_count*/) const override
+        {
+            const IColumn * col = arguments[0].column.get();
+            ColumnPtr res_column;
+
+            if (tryExecute<Int32>(col, res_column) ||
+                tryExecute<Int64>(col, res_column))
+                return res_column;
+
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of 
argument of function {}",
+                    arguments[0].column->getName(), getName());
+        }
+
+        template <typename T>
+        bool tryExecute(const IColumn * col, ColumnPtr & col_res) const
+        {
+            const ColumnVector<T> * col_vec = 
checkAndGetColumn<ColumnVector<T>>(col);
+            if (!col_vec)
+                return false;
+
+            auto col_str = ColumnString::create();
+            ColumnString::Chars & out_chars = col_str->getChars();
+            ColumnString::Offsets & out_offsets = col_str->getOffsets();
+
+            const typename ColumnVector<T>::Container & in_vec = 
col_vec->getData();
+            size_t size = in_vec.size();
+            out_offsets.resize_exact(size);
+
+            size_t tot_len = 0;
+            for (size_t i = 0; i < size; ++i)
+            {
+                auto len = std::max(1, static_cast<int>(64 - 
getLeadingZeroBits(static_cast<Int64>(in_vec[i]))));
+                tot_len += len + 1;
+            }
+            out_chars.resize_exact(tot_len);
+
+            size_t pos = 0;
+            for (size_t i = 0; i < size; ++i)
+            {
+                auto val = static_cast<Int64>(in_vec[i]);
+                auto len = std::max(1, static_cast<int>(64 - 
getLeadingZeroBits(val)));
+                char * begin = reinterpret_cast<char *>(&out_chars[pos]);
+                int char_pos = len;
+                do
+                {
+                    *(begin + (--char_pos)) = (val & 1) ? '1' : '0';
+                    val >>= 1;
+                } while (val != 0 && char_pos > 0);
+
+                pos += len + 1;
+                out_chars[pos - 1] = '\0';
+                out_offsets[i] = pos;
+            }
+
+            col_res = std::move(col_str);
+            return true;
+        }
+    };
+}
+
+REGISTER_FUNCTION(SparkFunctionBin)
+{
+    factory.registerFunction<SparkFunctionBin>();
+}
+
+}
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h 
b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
index cac16426e..b9a72cf05 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
@@ -116,6 +116,7 @@ static const std::map<std::string, std::string> 
SCALAR_FUNCTIONS
        {"check_overflow", "checkDecimalOverflowSpark"},
        {"rand", "randCanonical"},
        {"isnan", "isNaN"},
+       {"bin", "sparkBin"},
 
        /// string functions
        {"like", "like"},
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 8328723a5..05e00ca5d 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -846,7 +846,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("tanh")
     .exclude("rint")
     .exclude("expm1")
-    .exclude("bin")
     .exclude("unhex")
     .exclude("atan2")
     .exclude("round/bround/floor/ceil")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to