This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new ae6a7795cb [GLUTEN-7455][CH] Add spark_modulo for compatibility (#7619)
ae6a7795cb is described below
commit ae6a7795cb4823e98c53c42149e6ae752323d2da
Author: lgbo <[email protected]>
AuthorDate: Mon Oct 21 17:55:40 2024 +0800
[GLUTEN-7455][CH] Add spark_modulo for compatibility (#7619)
---
.../execution/GlutenFunctionValidateSuite.scala | 27 +++
cpp-ch/local-engine/CMakeLists.txt | 3 +-
.../Functions/SparkFunctionBinaryArithmetic.cpp | 207 +++++++++++++++++++++
.../Parser/scalar_function_parser/arithmetic.cpp | 2 +-
4 files changed, 237 insertions(+), 2 deletions(-)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
index 27fd7b5043..04d95907bd 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
@@ -828,4 +828,31 @@ class GlutenFunctionValidateSuite extends
GlutenClickHouseWholeStageTransformerS
""".stripMargin
runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
}
+
+ test("GLUTEN-7455 negative modulo") {
+ withTable("test_7455") {
+ spark.sql("create table test_7455(x bigint) using parquet")
+ val insert_sql =
+ """
+ |insert into test_7455 values
+ |(327696126396414574)
+ |,(618162455457852376)
+ |,(-1)
+ |,(-2)
+ |""".stripMargin
+ spark.sql(insert_sql)
+ val sql =
+ """
+ |select x,
+ |x % 4294967296,
+ |x % -4294967296,
+ |x % 4294967295,
+ |x % -4294967295,
+ |x % 100,
+ |x % -100
+ |from test_7455
+ |""".stripMargin
+ compareResultsAgainstVanillaSpark(sql, true, { _ => })
+ }
+ }
}
diff --git a/cpp-ch/local-engine/CMakeLists.txt
b/cpp-ch/local-engine/CMakeLists.txt
index d145ed339f..03e17ec311 100644
--- a/cpp-ch/local-engine/CMakeLists.txt
+++ b/cpp-ch/local-engine/CMakeLists.txt
@@ -81,7 +81,8 @@ include_directories(
${ClickHouse_SOURCE_DIR}/contrib/azure/sdk/storage/azure-storage-common/inc
${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm/include
${ClickHouse_SOURCE_DIR}/contrib/llvm-project/utils/bazel/llvm-project-overlay/llvm/include
-)
+ ${ClickHouse_SOURCE_DIR}/contrib/libdivide
+ ${ClickHouse_SOURCE_DIR}/contrib/libdivide-cmake)
add_subdirectory(Storages/Parquet)
add_subdirectory(Storages/SubstraitSource)
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionBinaryArithmetic.cpp
b/cpp-ch/local-engine/Functions/SparkFunctionBinaryArithmetic.cpp
new file mode 100644
index 0000000000..8effd37e2f
--- /dev/null
+++ b/cpp-ch/local-engine/Functions/SparkFunctionBinaryArithmetic.cpp
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <Functions/FunctionBinaryArithmetic.h>
+#include <Functions/FunctionFactory.h>
+
+#include <libdivide.h>
+#include <libdivide-config.h>
+
+#include <Poco/Logger.h>
+#include <Common/logger_useful.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+extern const int ILLEGAL_DIVISION;
+}
+}
+namespace local_engine
+{
+/// Optimizations for integer modulo by a constant.
+
+template <typename A, typename B>
+struct ModuloByConstantImpl : DB::BinaryOperation<A, B, DB::ModuloImpl<A, B>>
+{
+ using Op = DB::ModuloImpl<A, B>;
+ using ResultType = typename Op::ResultType;
+ static const constexpr bool allow_fixed_string = false;
+ static const constexpr bool allow_string_integer = false;
+
+ template <DB::OpCase op_case>
+ static void NO_INLINE
+ process(const A * __restrict a, const B * __restrict b, ResultType *
__restrict c, size_t size, const DB::NullMap * right_nullmap)
+ {
+ if constexpr (op_case == DB::OpCase::RightConstant)
+ {
+ if (right_nullmap && (*right_nullmap)[0])
+ return;
+ vectorConstant(a, *b, c, size);
+ }
+ else
+ {
+ if (right_nullmap)
+ {
+ for (size_t i = 0; i < size; ++i)
+ if ((*right_nullmap)[i])
+ c[i] = ResultType();
+ else
+ apply<op_case>(a, b, c, i);
+ }
+ else
+ for (size_t i = 0; i < size; ++i)
+ apply<op_case>(a, b, c, i);
+ }
+ }
+
+ static ResultType process(A a, B b) { return Op::template
apply<ResultType>(a, b); }
+
+ static void NO_INLINE NO_SANITIZE_UNDEFINED vectorConstant(const A *
__restrict src, B b, ResultType * __restrict dst, size_t size)
+ {
+ /// Modulo with too small divisor.
+ if (unlikely((std::is_signed_v<B> && b == -1) || b == 1))
+ {
+ for (size_t i = 0; i < size; ++i)
+ dst[i] = 0;
+ return;
+ }
+
+ /// Modulo with too large divisor.
+ if (unlikely(
+ b > std::numeric_limits<A>::max() || (std::is_signed_v<A> &&
std::is_signed_v<B> && b < std::numeric_limits<A>::lowest())))
+ {
+ for (size_t i = 0; i < size; ++i)
+ dst[i] = static_cast<ResultType>(src[i]);
+ return;
+ }
+
+ if (unlikely(static_cast<A>(b) == 0))
+ throw DB::Exception(DB::ErrorCodes::ILLEGAL_DIVISION, "Division by
zero");
+
+ /// Division by min negative value.
+ if (std::is_signed_v<B> && b == std::numeric_limits<B>::lowest())
+ throw DB::Exception(DB::ErrorCodes::ILLEGAL_DIVISION, "Division by
the most negative number");
+
+ /// Modulo of division by negative number is the same as the positive
number.
+ if (b < 0)
+ b = -b;
+
+ /// Here we failed to make the SSE variant from libdivide give an
advantage.
+ libdivide::divider<A> divider(static_cast<A>(b));
+ for (size_t i = 0; i < size; ++i)
+ {
+ /// NOTE: perhaps, the division semantics with the remainder of
negative numbers is not preserved.
+ dst[i] = static_cast<ResultType>(src[i] - (src[i] / divider) * b);
+ }
+ }
+
+private:
+ template <DB::OpCase op_case>
+ static void apply(const A * __restrict a, const B * __restrict b,
ResultType * __restrict c, size_t i)
+ {
+ if constexpr (op_case == DB::OpCase::Vector)
+ c[i] = Op::template apply<ResultType>(a[i], b[i]);
+ else
+ c[i] = Op::template apply<ResultType>(*a, b[i]);
+ }
+};
+}
+
+namespace DB
+{
+namespace impl_
+{
+template <>
+struct BinaryOperationImpl<UInt64, UInt8, ModuloImpl<UInt64, UInt8>> :
local_engine::ModuloByConstantImpl<UInt64, UInt8>
+{
+};
+template <>
+struct BinaryOperationImpl<UInt64, UInt16, ModuloImpl<UInt64, UInt16>> :
local_engine::ModuloByConstantImpl<UInt64, UInt16>
+{
+};
+template <>
+struct BinaryOperationImpl<UInt64, UInt32, ModuloImpl<UInt64, UInt32>> :
local_engine::ModuloByConstantImpl<UInt64, UInt32>
+{
+};
+template <>
+struct BinaryOperationImpl<UInt64, UInt64, ModuloImpl<UInt64, UInt64>> :
local_engine::ModuloByConstantImpl<UInt64, UInt64>
+{
+};
+
+template <>
+struct BinaryOperationImpl<UInt32, UInt8, ModuloImpl<UInt32, UInt8>> :
local_engine::ModuloByConstantImpl<UInt32, UInt8>
+{
+};
+template <>
+struct BinaryOperationImpl<UInt32, UInt16, ModuloImpl<UInt32, UInt16>> :
local_engine::ModuloByConstantImpl<UInt32, UInt16>
+{
+};
+template <>
+struct BinaryOperationImpl<UInt32, UInt32, ModuloImpl<UInt32, UInt32>> :
local_engine::ModuloByConstantImpl<UInt32, UInt32>
+{
+};
+template <>
+struct BinaryOperationImpl<UInt32, UInt64, ModuloImpl<UInt32, UInt64>> :
local_engine::ModuloByConstantImpl<UInt32, UInt64>
+{
+};
+
+template <>
+struct BinaryOperationImpl<Int64, Int8, ModuloImpl<Int64, Int8>> :
local_engine::ModuloByConstantImpl<Int64, Int8>
+{
+};
+template <>
+struct BinaryOperationImpl<Int64, Int16, ModuloImpl<Int64, Int16>> :
local_engine::ModuloByConstantImpl<Int64, Int16>
+{
+};
+template <>
+struct BinaryOperationImpl<Int64, Int32, ModuloImpl<Int64, Int32>> :
local_engine::ModuloByConstantImpl<Int64, Int32>
+{
+};
+template <>
+struct BinaryOperationImpl<Int64, Int64, ModuloImpl<Int64, Int64>> :
local_engine::ModuloByConstantImpl<Int64, Int64>
+{
+};
+
+template <>
+struct BinaryOperationImpl<Int32, Int8, ModuloImpl<Int32, Int8>> :
local_engine::ModuloByConstantImpl<Int32, Int8>
+{
+};
+template <>
+struct BinaryOperationImpl<Int32, Int16, ModuloImpl<Int32, Int16>> :
local_engine::ModuloByConstantImpl<Int32, Int16>
+{
+};
+template <>
+struct BinaryOperationImpl<Int32, Int32, ModuloImpl<Int32, Int32>> :
local_engine::ModuloByConstantImpl<Int32, Int32>
+{
+};
+template <>
+struct BinaryOperationImpl<Int32, Int64, ModuloImpl<Int32, Int64>> :
local_engine::ModuloByConstantImpl<Int32, Int64>
+{
+};
+}
+
+struct SparkNameModulo
+{
+ static constexpr auto name = "spark_modulo";
+};
+using SparkFunctionModulo = DB::BinaryArithmeticOverloadResolver<ModuloImpl,
SparkNameModulo, false>;
+
+REGISTER_FUNCTION(SparkModulo)
+{
+ factory.registerFunction<SparkFunctionModulo>();
+}
+}
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/arithmetic.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/arithmetic.cpp
index 06c0552968..7268c731c4 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/arithmetic.cpp
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/arithmetic.cpp
@@ -321,7 +321,7 @@ protected:
return toFunctionNode(actions_dag, function_name, {left_arg,
right_arg, type_node});
}
- return toFunctionNode(actions_dag, "modulo", {left_arg, right_arg});
+ return toFunctionNode(actions_dag, "spark_modulo", {left_arg,
right_arg});
}
};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]