This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 31c384f0d [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240608)
(#6023)
31c384f0d is described below
commit 31c384f0d124293c28eea787260d6566ac51f9a4
Author: Kyligence Git <[email protected]>
AuthorDate: Sat Jun 8 08:31:50 2024 -0500
[GLUTEN-1632][CH]Daily Update Clickhouse Version (20240608) (#6023)
* [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240608)
* Fix Build due to https://github.com/ClickHouse/ClickHouse/pull/64798
* Fix UT
---------
Co-authored-by: kyligence-git <[email protected]>
Co-authored-by: Chang Chen <[email protected]>
---
cpp-ch/clickhouse.version | 4 +-
cpp-ch/local-engine/Functions/SparkFunctionFloor.h | 69 ++++++++++++++-----
.../Functions/SparkFunctionRoundHalfUp.h | 77 +++++++++++-----------
.../tests/gtest_parquet_columnindex.cpp | 2 +-
4 files changed, 91 insertions(+), 61 deletions(-)
diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index d98ce9f88..a5ca8d8dd 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20240606
-CH_COMMIT=fed1c01e169
+CH_BRANCH=rebase_ch/20240608
+CH_COMMIT=b5050282335
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionFloor.h
b/cpp-ch/local-engine/Functions/SparkFunctionFloor.h
index b016c9afa..ce33d11db 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionFloor.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionFloor.h
@@ -14,13 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#include <Functions/FunctionsRound.h>
-#include <Functions/FunctionFactory.h>
+#pragma once
+
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnVector.h>
-#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypesNumber.h>
-#include <bit>
+#include <DataTypes/IDataType.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionsRound.h>
using namespace DB;
@@ -130,20 +131,29 @@ struct SparkFloatFloorImpl
{
private:
static_assert(!is_decimal<T>);
- using Op = FloatRoundingComputation<T, RoundingMode::Floor, scale_mode>;
- using Data = std::array<T, Op::data_count>;
+ template <
+ Vectorize vectorize =
+#ifdef __SSE4_1__
+ Vectorize::Yes
+#else
+ Vectorize::No
+#endif
+ >
+ using Op = FloatRoundingComputation<T, RoundingMode::Floor, scale_mode,
vectorize>;
+ using Data = std::array<T, Op<>::data_count>;
+
public:
static void apply(const PaddedPODArray<T> & in, size_t scale,
PaddedPODArray<T> & out, PaddedPODArray<UInt8> & null_map)
{
- auto mm_scale = Op::prepare(scale);
+ auto mm_scale = Op<>::prepare(scale);
const size_t data_count = std::tuple_size<Data>();
- const T* end_in = in.data() + in.size();
- const T* limit = in.data() + in.size() / data_count * data_count;
- const T* __restrict p_in = in.data();
- T* __restrict p_out = out.data();
+ const T * end_in = in.data() + in.size();
+ const T * limit = in.data() + in.size() / data_count * data_count;
+ const T * __restrict p_in = in.data();
+ T * __restrict p_out = out.data();
while (p_in < limit)
{
- Op::compute(p_in, mm_scale, p_out);
+ Op<>::compute(p_in, mm_scale, p_out);
p_in += data_count;
p_out += data_count;
}
@@ -154,7 +164,7 @@ public:
Data tmp_dst;
size_t tail_size_bytes = (end_in - p_in) * sizeof(*p_in);
memcpy(&tmp_src, p_in, tail_size_bytes);
- Op::compute(reinterpret_cast<T *>(&tmp_src), mm_scale,
reinterpret_cast<T *>(&tmp_dst));
+ Op<>::compute(reinterpret_cast<T *>(&tmp_src), mm_scale,
reinterpret_cast<T *>(&tmp_dst));
memcpy(p_out, &tmp_dst, tail_size_bytes);
}
@@ -171,11 +181,31 @@ public:
checkAndSetNullable(out[i], null_map[i]);
}
}
-
};
class SparkFunctionFloor : public DB::FunctionFloor
{
+ static Scale getScaleArg(const ColumnsWithTypeAndName & arguments)
+ {
+ if (arguments.size() == 2)
+ {
+ const IColumn & scale_column = *arguments[1].column;
+ if (!isColumnConst(scale_column))
+ throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument
for rounding functions must be constant");
+
+ Field scale_field = assert_cast<const ColumnConst
&>(scale_column).getField();
+ if (scale_field.getType() != Field::Types::UInt64 &&
scale_field.getType() != Field::Types::Int64)
+ throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument
for rounding functions must have integer type");
+
+ Int64 scale64 = scale_field.get<Int64>();
+ if (scale64 > std::numeric_limits<Scale>::max() || scale64 <
std::numeric_limits<Scale>::min())
+ throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale
argument for rounding function is too large");
+
+ return scale64;
+ }
+ return 0;
+ }
+
public:
static constexpr auto name = "sparkFloor";
static DB::FunctionPtr create(DB::ContextPtr) { return
std::make_shared<SparkFunctionFloor>(); }
@@ -183,17 +213,20 @@ public:
~SparkFunctionFloor() override = default;
String getName() const override { return name; }
- DB::DataTypePtr getReturnTypeImpl(const DB::DataTypes & arguments) const
override
+ ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return
{1}; }
+
+ DB::DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &
arguments) const override
{
auto result_type = DB::FunctionFloor::getReturnTypeImpl(arguments);
return makeNullable(result_type);
}
- DB::ColumnPtr executeImpl(const DB::ColumnsWithTypeAndName & arguments,
const DB::DataTypePtr & result_type, size_t input_rows) const override
+ DB::ColumnPtr
+ executeImpl(const DB::ColumnsWithTypeAndName & arguments, const
DB::DataTypePtr & result_type, size_t input_rows) const override
{
const ColumnWithTypeAndName & first_arg = arguments[0];
Scale scale_arg = getScaleArg(arguments);
- switch(first_arg.type->getTypeId())
+ switch (first_arg.type->getTypeId())
{
case TypeIndex::Float32:
return executeInternal<Float32>(first_arg.column, scale_arg);
@@ -206,7 +239,7 @@ public:
}
}
- template<typename T>
+ template <typename T>
static ColumnPtr executeInternal(const ColumnPtr & col_arg, const Scale &
scale_arg)
{
const auto * col = checkAndGetColumn<ColumnVector<T>>(col_arg.get());
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h
b/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h
index 47135aabd..441842d4e 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h
@@ -18,6 +18,11 @@
#include <Functions/FunctionsRound.h>
+namespace DB::ErrorCodes
+{
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
namespace local_engine
{
using namespace DB;
@@ -35,10 +40,11 @@ public:
static VectorType load(const ScalarType * in) { return _mm_loadu_ps(in); }
static VectorType load1(const ScalarType in) { return _mm_load1_ps(&in); }
- static void store(ScalarType * out, VectorType val) { _mm_storeu_ps(out,
val);}
+ static void store(ScalarType * out, VectorType val) { _mm_storeu_ps(out,
val); }
static VectorType multiply(VectorType val, VectorType scale) { return
_mm_mul_ps(val, scale); }
static VectorType divide(VectorType val, VectorType scale) { return
_mm_div_ps(val, scale); }
- template <RoundingMode mode> static VectorType apply(VectorType val)
+ template <RoundingMode mode>
+ static VectorType apply(VectorType val)
{
ScalarType tempFloatsIn[data_count];
ScalarType tempFloatsOut[data_count];
@@ -49,10 +55,7 @@ public:
return load(tempFloatsOut);
}
- static VectorType prepare(size_t scale)
- {
- return load1(scale);
- }
+ static VectorType prepare(size_t scale) { return load1(scale); }
};
template <>
@@ -65,10 +68,11 @@ public:
static VectorType load(const ScalarType * in) { return _mm_loadu_pd(in); }
static VectorType load1(const ScalarType in) { return _mm_load1_pd(&in); }
- static void store(ScalarType * out, VectorType val) { _mm_storeu_pd(out,
val);}
+ static void store(ScalarType * out, VectorType val) { _mm_storeu_pd(out,
val); }
static VectorType multiply(VectorType val, VectorType scale) { return
_mm_mul_pd(val, scale); }
static VectorType divide(VectorType val, VectorType scale) { return
_mm_div_pd(val, scale); }
- template <RoundingMode mode> static VectorType apply(VectorType val)
+ template <RoundingMode mode>
+ static VectorType apply(VectorType val)
{
ScalarType tempFloatsIn[data_count];
ScalarType tempFloatsOut[data_count];
@@ -79,10 +83,7 @@ public:
return load(tempFloatsOut);
}
- static VectorType prepare(size_t scale)
- {
- return load1(scale);
- }
+ static VectorType prepare(size_t scale) { return load1(scale); }
};
@@ -135,11 +136,11 @@ public:
const size_t data_count = std::tuple_size<Data>();
- const T* end_in = in.data() + in.size();
- const T* limit = in.data() + in.size() / data_count * data_count;
+ const T * end_in = in.data() + in.size();
+ const T * limit = in.data() + in.size() / data_count * data_count;
- const T* __restrict p_in = in.data();
- T* __restrict p_out = out.data();
+ const T * __restrict p_in = in.data();
+ T * __restrict p_out = out.data();
while (p_in < limit)
{
@@ -169,9 +170,10 @@ template <typename T, RoundingMode rounding_mode,
TieBreakingMode tie_breaking_m
struct DispatcherRoundingHalfUp
{
template <ScaleMode scale_mode>
- using FunctionRoundingImpl =
std::conditional_t<std::is_floating_point_v<T>,
- FloatRoundingHalfUpImpl<T,
rounding_mode, scale_mode>,
- IntegerRoundingImpl<T,
rounding_mode, scale_mode, tie_breaking_mode>>;
+ using FunctionRoundingImpl = std::conditional_t<
+ std::is_floating_point_v<T>,
+ FloatRoundingHalfUpImpl<T, rounding_mode, scale_mode>,
+ IntegerRoundingImpl<T, rounding_mode, scale_mode, tie_breaking_mode>>;
static ColumnPtr apply(const IColumn * col_general, Scale scale_arg)
{
@@ -233,10 +235,7 @@ public:
static constexpr auto name = "roundHalfUp";
static FunctionPtr create(ContextPtr) { return
std::make_shared<FunctionRoundingHalfUp>(); }
- String getName() const override
- {
- return name;
- }
+ String getName() const override { return name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
@@ -246,14 +245,16 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if ((arguments.empty()) || (arguments.size() > 2))
- throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
- "Number of arguments for function {} doesn't
match: passed {}, should be 1 or 2.",
- getName(), arguments.size());
+ throw Exception(
+ ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "Number of arguments for function {} doesn't match: passed {},
should be 1 or 2.",
+ getName(),
+ arguments.size());
for (const auto & type : arguments)
if (!isNumber(type))
- throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal
type {} of argument of function {}",
- arguments[0]->getName(), getName());
+ throw Exception(
+ ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of
argument of function {}", arguments[0]->getName(), getName());
return arguments[0];
}
@@ -267,13 +268,11 @@ public:
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument
for rounding functions must be constant");
Field scale_field = assert_cast<const ColumnConst
&>(scale_column).getField();
- if (scale_field.getType() != Field::Types::UInt64
- && scale_field.getType() != Field::Types::Int64)
+ if (scale_field.getType() != Field::Types::UInt64 &&
scale_field.getType() != Field::Types::Int64)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument
for rounding functions must have integer type");
Int64 scale64 = scale_field.get<Int64>();
- if (scale64 > std::numeric_limits<Scale>::max()
- || scale64 < std::numeric_limits<Scale>::min())
+ if (scale64 > std::numeric_limits<Scale>::max() || scale64 <
std::numeric_limits<Scale>::min())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale
argument for rounding function is too large");
return scale64;
@@ -305,26 +304,24 @@ public:
};
if (!callOnIndexAndDataType<void>(column.type->getTypeId(), call))
- {
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of
argument of function {}", column.name, getName());
- }
return res;
}
- bool hasInformationAboutMonotonicity() const override
- {
- return true;
- }
+ bool hasInformationAboutMonotonicity() const override { return true; }
Monotonicity getMonotonicityForRange(const IDataType &, const Field &,
const Field &) const override
{
- return { .is_monotonic = true, .is_always_monotonic = true };
+ return {.is_monotonic = true, .is_always_monotonic = true};
}
};
-struct NameRoundHalfUp { static constexpr auto name = "roundHalfUp"; };
+struct NameRoundHalfUp
+{
+ static constexpr auto name = "roundHalfUp";
+};
using FunctionRoundHalfUp = FunctionRoundingHalfUp<NameRoundHalfUp,
RoundingMode::Round, TieBreakingMode::Auto>;
diff --git a/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
b/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
index 532244029..bdaa51f97 100644
--- a/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
+++ b/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
@@ -604,7 +604,7 @@ TEST(ColumnIndex, DecimalField)
ASSERT_EQ(actual, expected);
- /// Eexception test, only in relase release node
+ /// Exception test, only in release node
#ifdef NDEBUG
Field unsupport = DecimalField<Decimal256>(Int256(300000000), 4);
EXPECT_THROW(to_parquet.as(unsupport, desc), DB::Exception);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]