(incubator-gluten) branch main updated: [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240608) (#6023)

changchen Sat, 08 Jun 2024 06:31:59 -0700

This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 31c384f0d [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240608) 
(#6023)
31c384f0d is described below

commit 31c384f0d124293c28eea787260d6566ac51f9a4
Author: Kyligence Git <[email protected]>
AuthorDate: Sat Jun 8 08:31:50 2024 -0500

    [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240608) (#6023)
    
    * [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240608)
    
    * Fix Build due to https://github.com/ClickHouse/ClickHouse/pull/64798
    
    * Fix UT
    
    ---------
    
    Co-authored-by: kyligence-git <[email protected]>
    Co-authored-by: Chang Chen <[email protected]>
---
 cpp-ch/clickhouse.version                          |  4 +-
 cpp-ch/local-engine/Functions/SparkFunctionFloor.h | 69 ++++++++++++++-----
 .../Functions/SparkFunctionRoundHalfUp.h           | 77 +++++++++++-----------
 .../tests/gtest_parquet_columnindex.cpp            |  2 +-
 4 files changed, 91 insertions(+), 61 deletions(-)

diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index d98ce9f88..a5ca8d8dd 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
 CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20240606
-CH_COMMIT=fed1c01e169
+CH_BRANCH=rebase_ch/20240608
+CH_COMMIT=b5050282335
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionFloor.h 
b/cpp-ch/local-engine/Functions/SparkFunctionFloor.h
index b016c9afa..ce33d11db 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionFloor.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionFloor.h
@@ -14,13 +14,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <Functions/FunctionsRound.h>
-#include <Functions/FunctionFactory.h>
+#pragma once
+
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnVector.h>
-#include <DataTypes/IDataType.h>
 #include <DataTypes/DataTypesNumber.h>
-#include <bit>
+#include <DataTypes/IDataType.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionsRound.h>
 
 using namespace DB;
 
@@ -130,20 +131,29 @@ struct SparkFloatFloorImpl
 {
 private:
     static_assert(!is_decimal<T>);
-    using Op = FloatRoundingComputation<T, RoundingMode::Floor, scale_mode>;
-    using Data = std::array<T, Op::data_count>;
+    template <
+        Vectorize vectorize =
+#ifdef __SSE4_1__
+            Vectorize::Yes
+#else
+            Vectorize::No
+#endif
+        >
+    using Op = FloatRoundingComputation<T, RoundingMode::Floor, scale_mode, 
vectorize>;
+    using Data = std::array<T, Op<>::data_count>;
+
 public:
     static void apply(const PaddedPODArray<T> & in, size_t scale, 
PaddedPODArray<T> & out, PaddedPODArray<UInt8> & null_map)
     {
-        auto mm_scale = Op::prepare(scale);
+        auto mm_scale = Op<>::prepare(scale);
         const size_t data_count = std::tuple_size<Data>();
-        const T* end_in = in.data() + in.size();
-        const T* limit = in.data() + in.size() / data_count * data_count;
-        const T* __restrict p_in = in.data();
-        T* __restrict p_out = out.data();
+        const T * end_in = in.data() + in.size();
+        const T * limit = in.data() + in.size() / data_count * data_count;
+        const T * __restrict p_in = in.data();
+        T * __restrict p_out = out.data();
         while (p_in < limit)
         {
-            Op::compute(p_in, mm_scale, p_out);
+            Op<>::compute(p_in, mm_scale, p_out);
             p_in += data_count;
             p_out += data_count;
         }
@@ -154,7 +164,7 @@ public:
             Data tmp_dst;
             size_t tail_size_bytes = (end_in - p_in) * sizeof(*p_in);
             memcpy(&tmp_src, p_in, tail_size_bytes);
-            Op::compute(reinterpret_cast<T *>(&tmp_src), mm_scale, 
reinterpret_cast<T *>(&tmp_dst));
+            Op<>::compute(reinterpret_cast<T *>(&tmp_src), mm_scale, 
reinterpret_cast<T *>(&tmp_dst));
             memcpy(p_out, &tmp_dst, tail_size_bytes);
         }
 
@@ -171,11 +181,31 @@ public:
                 checkAndSetNullable(out[i], null_map[i]);
         }
     }
-
 };
 
 class SparkFunctionFloor : public DB::FunctionFloor
 {
+    static Scale getScaleArg(const ColumnsWithTypeAndName & arguments)
+    {
+        if (arguments.size() == 2)
+        {
+            const IColumn & scale_column = *arguments[1].column;
+            if (!isColumnConst(scale_column))
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument 
for rounding functions must be constant");
+
+            Field scale_field = assert_cast<const ColumnConst 
&>(scale_column).getField();
+            if (scale_field.getType() != Field::Types::UInt64 && 
scale_field.getType() != Field::Types::Int64)
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument 
for rounding functions must have integer type");
+
+            Int64 scale64 = scale_field.get<Int64>();
+            if (scale64 > std::numeric_limits<Scale>::max() || scale64 < 
std::numeric_limits<Scale>::min())
+                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale 
argument for rounding function is too large");
+
+            return scale64;
+        }
+        return 0;
+    }
+
 public:
     static constexpr auto name = "sparkFloor";
     static DB::FunctionPtr create(DB::ContextPtr) { return 
std::make_shared<SparkFunctionFloor>(); }
@@ -183,17 +213,20 @@ public:
     ~SparkFunctionFloor() override = default;
     String getName() const override { return name; }
 
-    DB::DataTypePtr getReturnTypeImpl(const DB::DataTypes & arguments) const 
override
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return 
{1}; }
+
+    DB::DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & 
arguments) const override
     {
         auto result_type = DB::FunctionFloor::getReturnTypeImpl(arguments);
         return makeNullable(result_type);
     }
 
-    DB::ColumnPtr executeImpl(const DB::ColumnsWithTypeAndName & arguments, 
const DB::DataTypePtr & result_type, size_t input_rows) const override
+    DB::ColumnPtr
+    executeImpl(const DB::ColumnsWithTypeAndName & arguments, const 
DB::DataTypePtr & result_type, size_t input_rows) const override
     {
         const ColumnWithTypeAndName & first_arg = arguments[0];
         Scale scale_arg = getScaleArg(arguments);
-        switch(first_arg.type->getTypeId())
+        switch (first_arg.type->getTypeId())
         {
             case TypeIndex::Float32:
                 return executeInternal<Float32>(first_arg.column, scale_arg);
@@ -206,7 +239,7 @@ public:
         }
     }
 
-    template<typename T>
+    template <typename T>
     static ColumnPtr executeInternal(const ColumnPtr & col_arg, const Scale & 
scale_arg)
     {
         const auto * col = checkAndGetColumn<ColumnVector<T>>(col_arg.get());
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h 
b/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h
index 47135aabd..441842d4e 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h
@@ -18,6 +18,11 @@
 
 #include <Functions/FunctionsRound.h>
 
+namespace DB::ErrorCodes
+{
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
 namespace local_engine
 {
 using namespace DB;
@@ -35,10 +40,11 @@ public:
 
     static VectorType load(const ScalarType * in) { return _mm_loadu_ps(in); }
     static VectorType load1(const ScalarType in) { return _mm_load1_ps(&in); }
-    static void store(ScalarType * out, VectorType val) { _mm_storeu_ps(out, 
val);}
+    static void store(ScalarType * out, VectorType val) { _mm_storeu_ps(out, 
val); }
     static VectorType multiply(VectorType val, VectorType scale) { return 
_mm_mul_ps(val, scale); }
     static VectorType divide(VectorType val, VectorType scale) { return 
_mm_div_ps(val, scale); }
-    template <RoundingMode mode> static VectorType apply(VectorType val)
+    template <RoundingMode mode>
+    static VectorType apply(VectorType val)
     {
         ScalarType tempFloatsIn[data_count];
         ScalarType tempFloatsOut[data_count];
@@ -49,10 +55,7 @@ public:
         return load(tempFloatsOut);
     }
 
-    static VectorType prepare(size_t scale)
-    {
-        return load1(scale);
-    }
+    static VectorType prepare(size_t scale) { return load1(scale); }
 };
 
 template <>
@@ -65,10 +68,11 @@ public:
 
     static VectorType load(const ScalarType * in) { return _mm_loadu_pd(in); }
     static VectorType load1(const ScalarType in) { return _mm_load1_pd(&in); }
-    static void store(ScalarType * out, VectorType val) { _mm_storeu_pd(out, 
val);}
+    static void store(ScalarType * out, VectorType val) { _mm_storeu_pd(out, 
val); }
     static VectorType multiply(VectorType val, VectorType scale) { return 
_mm_mul_pd(val, scale); }
     static VectorType divide(VectorType val, VectorType scale) { return 
_mm_div_pd(val, scale); }
-    template <RoundingMode mode> static VectorType apply(VectorType val)
+    template <RoundingMode mode>
+    static VectorType apply(VectorType val)
     {
         ScalarType tempFloatsIn[data_count];
         ScalarType tempFloatsOut[data_count];
@@ -79,10 +83,7 @@ public:
         return load(tempFloatsOut);
     }
 
-    static VectorType prepare(size_t scale)
-    {
-        return load1(scale);
-    }
+    static VectorType prepare(size_t scale) { return load1(scale); }
 };
 
 
@@ -135,11 +136,11 @@ public:
 
         const size_t data_count = std::tuple_size<Data>();
 
-        const T* end_in = in.data() + in.size();
-        const T* limit = in.data() + in.size() / data_count * data_count;
+        const T * end_in = in.data() + in.size();
+        const T * limit = in.data() + in.size() / data_count * data_count;
 
-        const T* __restrict p_in = in.data();
-        T* __restrict p_out = out.data();
+        const T * __restrict p_in = in.data();
+        T * __restrict p_out = out.data();
 
         while (p_in < limit)
         {
@@ -169,9 +170,10 @@ template <typename T, RoundingMode rounding_mode, 
TieBreakingMode tie_breaking_m
 struct DispatcherRoundingHalfUp
 {
     template <ScaleMode scale_mode>
-    using FunctionRoundingImpl = 
std::conditional_t<std::is_floating_point_v<T>,
-                                                    FloatRoundingHalfUpImpl<T, 
rounding_mode, scale_mode>,
-                                                    IntegerRoundingImpl<T, 
rounding_mode, scale_mode, tie_breaking_mode>>;
+    using FunctionRoundingImpl = std::conditional_t<
+        std::is_floating_point_v<T>,
+        FloatRoundingHalfUpImpl<T, rounding_mode, scale_mode>,
+        IntegerRoundingImpl<T, rounding_mode, scale_mode, tie_breaking_mode>>;
 
     static ColumnPtr apply(const IColumn * col_general, Scale scale_arg)
     {
@@ -233,10 +235,7 @@ public:
     static constexpr auto name = "roundHalfUp";
     static FunctionPtr create(ContextPtr) { return 
std::make_shared<FunctionRoundingHalfUp>(); }
 
-    String getName() const override
-    {
-        return name;
-    }
+    String getName() const override { return name; }
 
     bool isVariadic() const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
@@ -246,14 +245,16 @@ public:
     DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
     {
         if ((arguments.empty()) || (arguments.size() > 2))
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                            "Number of arguments for function {} doesn't 
match: passed {}, should be 1 or 2.",
-                            getName(), arguments.size());
+            throw Exception(
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Number of arguments for function {} doesn't match: passed {}, 
should be 1 or 2.",
+                getName(),
+                arguments.size());
 
         for (const auto & type : arguments)
             if (!isNumber(type))
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal 
type {} of argument of function {}",
-                                arguments[0]->getName(), getName());
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 
argument of function {}", arguments[0]->getName(), getName());
 
         return arguments[0];
     }
@@ -267,13 +268,11 @@ public:
                 throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument 
for rounding functions must be constant");
 
             Field scale_field = assert_cast<const ColumnConst 
&>(scale_column).getField();
-            if (scale_field.getType() != Field::Types::UInt64
-                && scale_field.getType() != Field::Types::Int64)
+            if (scale_field.getType() != Field::Types::UInt64 && 
scale_field.getType() != Field::Types::Int64)
                 throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument 
for rounding functions must have integer type");
 
             Int64 scale64 = scale_field.get<Int64>();
-            if (scale64 > std::numeric_limits<Scale>::max()
-                || scale64 < std::numeric_limits<Scale>::min())
+            if (scale64 > std::numeric_limits<Scale>::max() || scale64 < 
std::numeric_limits<Scale>::min())
                 throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale 
argument for rounding function is too large");
 
             return scale64;
@@ -305,26 +304,24 @@ public:
         };
 
         if (!callOnIndexAndDataType<void>(column.type->getTypeId(), call))
-        {
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of 
argument of function {}", column.name, getName());
-        }
 
         return res;
     }
 
-    bool hasInformationAboutMonotonicity() const override
-    {
-        return true;
-    }
+    bool hasInformationAboutMonotonicity() const override { return true; }
 
     Monotonicity getMonotonicityForRange(const IDataType &, const Field &, 
const Field &) const override
     {
-        return { .is_monotonic = true, .is_always_monotonic = true };
+        return {.is_monotonic = true, .is_always_monotonic = true};
     }
 };
 
 
-struct NameRoundHalfUp { static constexpr auto name = "roundHalfUp"; };
+struct NameRoundHalfUp
+{
+    static constexpr auto name = "roundHalfUp";
+};
 
 using FunctionRoundHalfUp = FunctionRoundingHalfUp<NameRoundHalfUp, 
RoundingMode::Round, TieBreakingMode::Auto>;
 
diff --git a/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp 
b/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
index 532244029..bdaa51f97 100644
--- a/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
+++ b/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
@@ -604,7 +604,7 @@ TEST(ColumnIndex, DecimalField)
     ASSERT_EQ(actual, expected);
 
 
-    /// Eexception test, only in relase release node
+    /// Exception test, only in release node
 #ifdef NDEBUG
     Field unsupport = DecimalField<Decimal256>(Int256(300000000), 4);
     EXPECT_THROW(to_parquet.as(unsupport, desc), DB::Exception);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240608) (#6023)

Reply via email to