This is an automated email from the ASF dual-hosted git repository.
liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 264a5dc02 [GLUTEN-5380][CH] Support bin function (#5383)
264a5dc02 is described below
commit 264a5dc02619c4c6b9ad6f51d9c7053c177fa917
Author: exmy <[email protected]>
AuthorDate: Mon Apr 15 17:47:10 2024 +0800
[GLUTEN-5380][CH] Support bin function (#5383)
What changes were proposed in this pull request?
(Fixes: #5380)
How was this patch tested?
Pass CI
---
.../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 8 ++
cpp-ch/local-engine/Functions/SparkFunctionBin.cpp | 131 +++++++++++++++++++++
cpp-ch/local-engine/Parser/SerializedPlanParser.h | 1 +
.../utils/clickhouse/ClickHouseTestSettings.scala | 1 -
4 files changed, 140 insertions(+), 1 deletion(-)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index 5b7109e73..27cb39584 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -1159,6 +1159,14 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends
GlutenClickHouseTPCHAbstr
runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
}
+ test("test bin function") {
+ runQueryAndCompare("select bin(id - 50) from range (100)")(
+ checkGlutenOperatorMatch[ProjectExecTransformer])
+
+ runQueryAndCompare("select bin(n_nationkey) from nation")(
+ checkGlutenOperatorMatch[ProjectExecTransformer])
+ }
+
test("test 'sequence'") {
runQueryAndCompare(
"select sequence(id, id+10), sequence(id+10, id), sequence(id, id+10,
3), " +
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionBin.cpp
b/cpp-ch/local-engine/Functions/SparkFunctionBin.cpp
new file mode 100644
index 000000000..9aa44bf45
--- /dev/null
+++ b/cpp-ch/local-engine/Functions/SparkFunctionBin.cpp
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <cmath>
+#include <Columns/ColumnString.h>
+#include <Columns/IColumn.h>
+#include <DataTypes/DataTypeString.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+ extern const int ILLEGAL_COLUMN;
+}
+}
+
+using namespace DB;
+namespace local_engine
+{
+namespace
+{
+ class SparkFunctionBin : public IFunction
+ {
+ public:
+ static constexpr auto name = "sparkBin";
+
+ static FunctionPtr create(ContextPtr) { return
std::make_shared<SparkFunctionBin>(); }
+
+ String getName() const override { return name; }
+
+ size_t getNumberOfArguments() const override { return 1; }
+
+ bool useDefaultImplementationForConstants() const override { return
true; }
+
+ bool isSuitableForShortCircuitArgumentsExecution(const
DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+
+ DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &
arguments) const override
+ {
+ if (!isInt64(arguments[0].type) && !isInt32(arguments[0].type))
+ throw Exception(
+ ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+ "Illegal type {} of argument of function {}, expected
Int64 or Int32.",
+ arguments[0].type->getName(),
+ getName());
+
+ return std::make_shared<DataTypeString>();
+ }
+
+ ColumnPtr
+ executeImpl(const ColumnsWithTypeAndName & arguments, const
DataTypePtr & result_type, size_t /*input_rows_count*/) const override
+ {
+ const IColumn * col = arguments[0].column.get();
+ ColumnPtr res_column;
+
+ if (tryExecute<Int32>(col, res_column) ||
+ tryExecute<Int64>(col, res_column))
+ return res_column;
+
+ throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of
argument of function {}",
+ arguments[0].column->getName(), getName());
+ }
+
+ template <typename T>
+ bool tryExecute(const IColumn * col, ColumnPtr & col_res) const
+ {
+ const ColumnVector<T> * col_vec =
checkAndGetColumn<ColumnVector<T>>(col);
+ if (!col_vec)
+ return false;
+
+ auto col_str = ColumnString::create();
+ ColumnString::Chars & out_chars = col_str->getChars();
+ ColumnString::Offsets & out_offsets = col_str->getOffsets();
+
+ const typename ColumnVector<T>::Container & in_vec =
col_vec->getData();
+ size_t size = in_vec.size();
+ out_offsets.resize_exact(size);
+
+ size_t tot_len = 0;
+ for (size_t i = 0; i < size; ++i)
+ {
+ auto len = std::max(1, static_cast<int>(64 -
getLeadingZeroBits(static_cast<Int64>(in_vec[i]))));
+ tot_len += len + 1;
+ }
+ out_chars.resize_exact(tot_len);
+
+ size_t pos = 0;
+ for (size_t i = 0; i < size; ++i)
+ {
+ auto val = static_cast<Int64>(in_vec[i]);
+ auto len = std::max(1, static_cast<int>(64 -
getLeadingZeroBits(val)));
+ char * begin = reinterpret_cast<char *>(&out_chars[pos]);
+ int char_pos = len;
+ do
+ {
+ *(begin + (--char_pos)) = (val & 1) ? '1' : '0';
+ val >>= 1;
+ } while (val != 0 && char_pos > 0);
+
+ pos += len + 1;
+ out_chars[pos - 1] = '\0';
+ out_offsets[i] = pos;
+ }
+
+ col_res = std::move(col_str);
+ return true;
+ }
+ };
+}
+
+REGISTER_FUNCTION(SparkFunctionBin)
+{
+ factory.registerFunction<SparkFunctionBin>();
+}
+
+}
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h
b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
index cac16426e..b9a72cf05 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
@@ -116,6 +116,7 @@ static const std::map<std::string, std::string>
SCALAR_FUNCTIONS
{"check_overflow", "checkDecimalOverflowSpark"},
{"rand", "randCanonical"},
{"isnan", "isNaN"},
+ {"bin", "sparkBin"},
/// string functions
{"like", "like"},
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 8328723a5..05e00ca5d 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -846,7 +846,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("tanh")
.exclude("rint")
.exclude("expm1")
- .exclude("bin")
.exclude("unhex")
.exclude("atan2")
.exclude("round/bround/floor/ceil")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]