[GitHub] [arrow] wesm commented on a change in pull request #7506: ARROW-9197: [C++] Overhaul integer/floating point casting: vectorize truncation checks, reduce binary size
wesm commented on a change in pull request #7506: URL: https://github.com/apache/arrow/pull/7506#discussion_r443696884 ## File path: cpp/src/arrow/util/int_util.cc ## @@ -461,75 +472,434 @@ Status IndexBoundsCheckImpl(const ArrayData& indices, uint64_t upper_limit) { if (indices.buffers[0]) { bitmap = indices.buffers[0]->data(); } - auto IsOutOfBounds = [&](int64_t i) -> bool { -return ( -(IsSigned && indices_data[i] < 0) || -(indices_data[i] >= 0 && static_cast(indices_data[i]) >= upper_limit)); + auto IsOutOfBounds = [&](IndexCType val) -> bool { +return ((IsSigned && val < 0) || +(val >= 0 && static_cast(val) >= upper_limit)); + }; + auto IsOutOfBoundsMaybeNull = [&](IndexCType val, bool is_valid) -> bool { +return is_valid && ((IsSigned && val < 0) || +(val >= 0 && static_cast(val) >= upper_limit)); }; OptionalBitBlockCounter indices_bit_counter(bitmap, indices.offset, indices.length); int64_t position = 0; + int64_t offset_position = indices.offset; while (position < indices.length) { BitBlockCount block = indices_bit_counter.NextBlock(); bool block_out_of_bounds = false; if (block.popcount == block.length) { // Fast path: branchless for (int64_t i = 0; i < block.length; ++i) { -block_out_of_bounds |= IsOutOfBounds(i); +block_out_of_bounds |= IsOutOfBounds(indices_data[i]); } } else if (block.popcount > 0) { // Indices have nulls, must only boundscheck non-null values - for (int64_t i = 0; i < block.length; ++i) { -if (BitUtil::GetBit(bitmap, indices.offset + position + i)) { - block_out_of_bounds |= IsOutOfBounds(i); + int64_t i = 0; + for (int64_t chunk = 0; chunk < block.length / 8; ++chunk) { +// Let the compiler unroll this +for (int j = 0; j < 8; ++j) { + block_out_of_bounds |= IsOutOfBoundsMaybeNull( + indices_data[i], BitUtil::GetBit(bitmap, offset_position + i)); + ++i; } } + for (; i < block.length; ++i) { +block_out_of_bounds |= IsOutOfBoundsMaybeNull( +indices_data[i], BitUtil::GetBit(bitmap, offset_position + i)); + } } if (ARROW_PREDICT_FALSE(block_out_of_bounds)) { if (indices.GetNullCount() > 0) { for (int64_t i = 0; i < block.length; ++i) { - if (BitUtil::GetBit(bitmap, indices.offset + position + i)) { -if (IsOutOfBounds(i)) { - return Status::IndexError("Index ", static_cast(indices_data[i]), -" out of bounds"); -} + if (IsOutOfBoundsMaybeNull(indices_data[i], + BitUtil::GetBit(bitmap, offset_position + i))) { +return Status::IndexError("Index ", FormatInt(indices_data[i]), + " out of bounds"); } } } else { for (int64_t i = 0; i < block.length; ++i) { - if (IsOutOfBounds(i)) { -return Status::IndexError("Index ", static_cast(indices_data[i]), + if (IsOutOfBounds(indices_data[i])) { +return Status::IndexError("Index ", FormatInt(indices_data[i]), " out of bounds"); } } } } indices_data += block.length; position += block.length; +offset_position += block.length; } return Status::OK(); } /// \brief Branchless boundschecking of the indices. Processes batches of /// indices at a time and shortcircuits when encountering an out-of-bounds /// index in a batch -Status IndexBoundsCheck(const ArrayData& indices, uint64_t upper_limit) { +Status CheckIndexBounds(const ArrayData& indices, uint64_t upper_limit) { switch (indices.type->id()) { case Type::INT8: - return IndexBoundsCheckImpl(indices, upper_limit); + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::INT16: + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::INT32: + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::INT64: + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::UINT8: + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::UINT16: + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::UINT32: + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::UINT64: + return CheckIndexBoundsImpl(indices, upper_limit); +default: + return Status::Invalid("Invalid index type for boundschecking"); + } +} + +// -- +// Utilities for casting from one integer type to another + +template +Status IntegersInRange(const Datum& datum, CType bound_lower, CType bound_upper) { + if (std::numeric_limits::lowest() >= bound_lower && +
[GitHub] [arrow] wesm commented on a change in pull request #7506: ARROW-9197: [C++] Overhaul integer/floating point casting: vectorize truncation checks, reduce binary size
wesm commented on a change in pull request #7506: URL: https://github.com/apache/arrow/pull/7506#discussion_r443689349 ## File path: cpp/src/arrow/util/int_util.cc ## @@ -461,75 +472,434 @@ Status IndexBoundsCheckImpl(const ArrayData& indices, uint64_t upper_limit) { if (indices.buffers[0]) { bitmap = indices.buffers[0]->data(); } - auto IsOutOfBounds = [&](int64_t i) -> bool { -return ( -(IsSigned && indices_data[i] < 0) || -(indices_data[i] >= 0 && static_cast(indices_data[i]) >= upper_limit)); + auto IsOutOfBounds = [&](IndexCType val) -> bool { +return ((IsSigned && val < 0) || +(val >= 0 && static_cast(val) >= upper_limit)); + }; + auto IsOutOfBoundsMaybeNull = [&](IndexCType val, bool is_valid) -> bool { +return is_valid && ((IsSigned && val < 0) || +(val >= 0 && static_cast(val) >= upper_limit)); }; OptionalBitBlockCounter indices_bit_counter(bitmap, indices.offset, indices.length); int64_t position = 0; + int64_t offset_position = indices.offset; while (position < indices.length) { BitBlockCount block = indices_bit_counter.NextBlock(); bool block_out_of_bounds = false; if (block.popcount == block.length) { // Fast path: branchless for (int64_t i = 0; i < block.length; ++i) { -block_out_of_bounds |= IsOutOfBounds(i); +block_out_of_bounds |= IsOutOfBounds(indices_data[i]); } } else if (block.popcount > 0) { // Indices have nulls, must only boundscheck non-null values - for (int64_t i = 0; i < block.length; ++i) { -if (BitUtil::GetBit(bitmap, indices.offset + position + i)) { - block_out_of_bounds |= IsOutOfBounds(i); + int64_t i = 0; + for (int64_t chunk = 0; chunk < block.length / 8; ++chunk) { +// Let the compiler unroll this +for (int j = 0; j < 8; ++j) { + block_out_of_bounds |= IsOutOfBoundsMaybeNull( + indices_data[i], BitUtil::GetBit(bitmap, offset_position + i)); + ++i; } } + for (; i < block.length; ++i) { +block_out_of_bounds |= IsOutOfBoundsMaybeNull( +indices_data[i], BitUtil::GetBit(bitmap, offset_position + i)); + } } if (ARROW_PREDICT_FALSE(block_out_of_bounds)) { if (indices.GetNullCount() > 0) { for (int64_t i = 0; i < block.length; ++i) { - if (BitUtil::GetBit(bitmap, indices.offset + position + i)) { -if (IsOutOfBounds(i)) { - return Status::IndexError("Index ", static_cast(indices_data[i]), -" out of bounds"); -} + if (IsOutOfBoundsMaybeNull(indices_data[i], + BitUtil::GetBit(bitmap, offset_position + i))) { +return Status::IndexError("Index ", FormatInt(indices_data[i]), + " out of bounds"); } } } else { for (int64_t i = 0; i < block.length; ++i) { - if (IsOutOfBounds(i)) { -return Status::IndexError("Index ", static_cast(indices_data[i]), + if (IsOutOfBounds(indices_data[i])) { +return Status::IndexError("Index ", FormatInt(indices_data[i]), " out of bounds"); } } } } indices_data += block.length; position += block.length; +offset_position += block.length; } return Status::OK(); } /// \brief Branchless boundschecking of the indices. Processes batches of /// indices at a time and shortcircuits when encountering an out-of-bounds /// index in a batch -Status IndexBoundsCheck(const ArrayData& indices, uint64_t upper_limit) { +Status CheckIndexBounds(const ArrayData& indices, uint64_t upper_limit) { switch (indices.type->id()) { case Type::INT8: - return IndexBoundsCheckImpl(indices, upper_limit); + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::INT16: + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::INT32: + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::INT64: + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::UINT8: + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::UINT16: + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::UINT32: + return CheckIndexBoundsImpl(indices, upper_limit); +case Type::UINT64: + return CheckIndexBoundsImpl(indices, upper_limit); +default: + return Status::Invalid("Invalid index type for boundschecking"); + } +} + +// -- +// Utilities for casting from one integer type to another + +template +Status IntegersInRange(const Datum& datum, CType bound_lower, CType bound_upper) { + if (std::numeric_limits::lowest() >= bound_lower && +
[GitHub] [arrow] wesm commented on a change in pull request #7506: ARROW-9197: [C++] Overhaul integer/floating point casting: vectorize truncation checks, reduce binary size
wesm commented on a change in pull request #7506: URL: https://github.com/apache/arrow/pull/7506#discussion_r443244978 ## File path: cpp/src/arrow/compute/kernels/codegen_internal.h ## @@ -215,27 +215,23 @@ template struct BoxScalar> { using T = typename GetOutputType::T; using ScalarType = typename TypeTraits::ScalarType; - static std::shared_ptr Box(T val, const std::shared_ptr& type) { -return std::make_shared(val, type); - } + static void Box(T val, Scalar* out) { checked_cast(out)->value = val; } }; template struct BoxScalar> { using T = typename GetOutputType::T; using ScalarType = typename TypeTraits::ScalarType; - static std::shared_ptr Box(T val, const std::shared_ptr&) { -return std::make_shared(val); + static void Box(T val, Scalar* out) { +checked_cast(out)->value = std::make_shared(val); } }; template <> struct BoxScalar { using T = Decimal128; using ScalarType = Decimal128Scalar; - static std::shared_ptr Box(T val, const std::shared_ptr& type) { -return std::make_shared(val, type); - } + static void Box(T val, Scalar* out) { checked_cast(out)->value = val; } Review comment: The prior implementation was causing a lot of compiled code to be generated for some reason, FYI This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org