[GitHub] [arrow] wesm commented on a change in pull request #7506: ARROW-9197: [C++] Overhaul integer/floating point casting: vectorize truncation checks, reduce binary size

2020-06-22 Thread GitBox


wesm commented on a change in pull request #7506:
URL: https://github.com/apache/arrow/pull/7506#discussion_r443696884



##
File path: cpp/src/arrow/util/int_util.cc
##
@@ -461,75 +472,434 @@ Status IndexBoundsCheckImpl(const ArrayData& indices, 
uint64_t upper_limit) {
   if (indices.buffers[0]) {
 bitmap = indices.buffers[0]->data();
   }
-  auto IsOutOfBounds = [&](int64_t i) -> bool {
-return (
-(IsSigned && indices_data[i] < 0) ||
-(indices_data[i] >= 0 && static_cast(indices_data[i]) >= 
upper_limit));
+  auto IsOutOfBounds = [&](IndexCType val) -> bool {
+return ((IsSigned && val < 0) ||
+(val >= 0 && static_cast(val) >= upper_limit));
+  };
+  auto IsOutOfBoundsMaybeNull = [&](IndexCType val, bool is_valid) -> bool {
+return is_valid && ((IsSigned && val < 0) ||
+(val >= 0 && static_cast(val) >= 
upper_limit));
   };
   OptionalBitBlockCounter indices_bit_counter(bitmap, indices.offset, 
indices.length);
   int64_t position = 0;
+  int64_t offset_position = indices.offset;
   while (position < indices.length) {
 BitBlockCount block = indices_bit_counter.NextBlock();
 bool block_out_of_bounds = false;
 if (block.popcount == block.length) {
   // Fast path: branchless
   for (int64_t i = 0; i < block.length; ++i) {
-block_out_of_bounds |= IsOutOfBounds(i);
+block_out_of_bounds |= IsOutOfBounds(indices_data[i]);
   }
 } else if (block.popcount > 0) {
   // Indices have nulls, must only boundscheck non-null values
-  for (int64_t i = 0; i < block.length; ++i) {
-if (BitUtil::GetBit(bitmap, indices.offset + position + i)) {
-  block_out_of_bounds |= IsOutOfBounds(i);
+  int64_t i = 0;
+  for (int64_t chunk = 0; chunk < block.length / 8; ++chunk) {
+// Let the compiler unroll this
+for (int j = 0; j < 8; ++j) {
+  block_out_of_bounds |= IsOutOfBoundsMaybeNull(
+  indices_data[i], BitUtil::GetBit(bitmap, offset_position + i));
+  ++i;
 }
   }
+  for (; i < block.length; ++i) {
+block_out_of_bounds |= IsOutOfBoundsMaybeNull(
+indices_data[i], BitUtil::GetBit(bitmap, offset_position + i));
+  }
 }
 if (ARROW_PREDICT_FALSE(block_out_of_bounds)) {
   if (indices.GetNullCount() > 0) {
 for (int64_t i = 0; i < block.length; ++i) {
-  if (BitUtil::GetBit(bitmap, indices.offset + position + i)) {
-if (IsOutOfBounds(i)) {
-  return Status::IndexError("Index ", 
static_cast(indices_data[i]),
-" out of bounds");
-}
+  if (IsOutOfBoundsMaybeNull(indices_data[i],
+ BitUtil::GetBit(bitmap, offset_position + 
i))) {
+return Status::IndexError("Index ", FormatInt(indices_data[i]),
+  " out of bounds");
   }
 }
   } else {
 for (int64_t i = 0; i < block.length; ++i) {
-  if (IsOutOfBounds(i)) {
-return Status::IndexError("Index ", 
static_cast(indices_data[i]),
+  if (IsOutOfBounds(indices_data[i])) {
+return Status::IndexError("Index ", FormatInt(indices_data[i]),
   " out of bounds");
   }
 }
   }
 }
 indices_data += block.length;
 position += block.length;
+offset_position += block.length;
   }
   return Status::OK();
 }
 
 /// \brief Branchless boundschecking of the indices. Processes batches of
 /// indices at a time and shortcircuits when encountering an out-of-bounds
 /// index in a batch
-Status IndexBoundsCheck(const ArrayData& indices, uint64_t upper_limit) {
+Status CheckIndexBounds(const ArrayData& indices, uint64_t upper_limit) {
   switch (indices.type->id()) {
 case Type::INT8:
-  return IndexBoundsCheckImpl(indices, upper_limit);
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::INT16:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::INT32:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::INT64:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::UINT8:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::UINT16:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::UINT32:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::UINT64:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+default:
+  return Status::Invalid("Invalid index type for boundschecking");
+  }
+}
+
+// --
+// Utilities for casting from one integer type to another
+
+template 
+Status IntegersInRange(const Datum& datum, CType bound_lower, CType 
bound_upper) {
+  if (std::numeric_limits::lowest() >= bound_lower &&
+

[GitHub] [arrow] wesm commented on a change in pull request #7506: ARROW-9197: [C++] Overhaul integer/floating point casting: vectorize truncation checks, reduce binary size

2020-06-22 Thread GitBox


wesm commented on a change in pull request #7506:
URL: https://github.com/apache/arrow/pull/7506#discussion_r443689349



##
File path: cpp/src/arrow/util/int_util.cc
##
@@ -461,75 +472,434 @@ Status IndexBoundsCheckImpl(const ArrayData& indices, 
uint64_t upper_limit) {
   if (indices.buffers[0]) {
 bitmap = indices.buffers[0]->data();
   }
-  auto IsOutOfBounds = [&](int64_t i) -> bool {
-return (
-(IsSigned && indices_data[i] < 0) ||
-(indices_data[i] >= 0 && static_cast(indices_data[i]) >= 
upper_limit));
+  auto IsOutOfBounds = [&](IndexCType val) -> bool {
+return ((IsSigned && val < 0) ||
+(val >= 0 && static_cast(val) >= upper_limit));
+  };
+  auto IsOutOfBoundsMaybeNull = [&](IndexCType val, bool is_valid) -> bool {
+return is_valid && ((IsSigned && val < 0) ||
+(val >= 0 && static_cast(val) >= 
upper_limit));
   };
   OptionalBitBlockCounter indices_bit_counter(bitmap, indices.offset, 
indices.length);
   int64_t position = 0;
+  int64_t offset_position = indices.offset;
   while (position < indices.length) {
 BitBlockCount block = indices_bit_counter.NextBlock();
 bool block_out_of_bounds = false;
 if (block.popcount == block.length) {
   // Fast path: branchless
   for (int64_t i = 0; i < block.length; ++i) {
-block_out_of_bounds |= IsOutOfBounds(i);
+block_out_of_bounds |= IsOutOfBounds(indices_data[i]);
   }
 } else if (block.popcount > 0) {
   // Indices have nulls, must only boundscheck non-null values
-  for (int64_t i = 0; i < block.length; ++i) {
-if (BitUtil::GetBit(bitmap, indices.offset + position + i)) {
-  block_out_of_bounds |= IsOutOfBounds(i);
+  int64_t i = 0;
+  for (int64_t chunk = 0; chunk < block.length / 8; ++chunk) {
+// Let the compiler unroll this
+for (int j = 0; j < 8; ++j) {
+  block_out_of_bounds |= IsOutOfBoundsMaybeNull(
+  indices_data[i], BitUtil::GetBit(bitmap, offset_position + i));
+  ++i;
 }
   }
+  for (; i < block.length; ++i) {
+block_out_of_bounds |= IsOutOfBoundsMaybeNull(
+indices_data[i], BitUtil::GetBit(bitmap, offset_position + i));
+  }
 }
 if (ARROW_PREDICT_FALSE(block_out_of_bounds)) {
   if (indices.GetNullCount() > 0) {
 for (int64_t i = 0; i < block.length; ++i) {
-  if (BitUtil::GetBit(bitmap, indices.offset + position + i)) {
-if (IsOutOfBounds(i)) {
-  return Status::IndexError("Index ", 
static_cast(indices_data[i]),
-" out of bounds");
-}
+  if (IsOutOfBoundsMaybeNull(indices_data[i],
+ BitUtil::GetBit(bitmap, offset_position + 
i))) {
+return Status::IndexError("Index ", FormatInt(indices_data[i]),
+  " out of bounds");
   }
 }
   } else {
 for (int64_t i = 0; i < block.length; ++i) {
-  if (IsOutOfBounds(i)) {
-return Status::IndexError("Index ", 
static_cast(indices_data[i]),
+  if (IsOutOfBounds(indices_data[i])) {
+return Status::IndexError("Index ", FormatInt(indices_data[i]),
   " out of bounds");
   }
 }
   }
 }
 indices_data += block.length;
 position += block.length;
+offset_position += block.length;
   }
   return Status::OK();
 }
 
 /// \brief Branchless boundschecking of the indices. Processes batches of
 /// indices at a time and shortcircuits when encountering an out-of-bounds
 /// index in a batch
-Status IndexBoundsCheck(const ArrayData& indices, uint64_t upper_limit) {
+Status CheckIndexBounds(const ArrayData& indices, uint64_t upper_limit) {
   switch (indices.type->id()) {
 case Type::INT8:
-  return IndexBoundsCheckImpl(indices, upper_limit);
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::INT16:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::INT32:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::INT64:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::UINT8:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::UINT16:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::UINT32:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+case Type::UINT64:
+  return CheckIndexBoundsImpl(indices, upper_limit);
+default:
+  return Status::Invalid("Invalid index type for boundschecking");
+  }
+}
+
+// --
+// Utilities for casting from one integer type to another
+
+template 
+Status IntegersInRange(const Datum& datum, CType bound_lower, CType 
bound_upper) {
+  if (std::numeric_limits::lowest() >= bound_lower &&
+

[GitHub] [arrow] wesm commented on a change in pull request #7506: ARROW-9197: [C++] Overhaul integer/floating point casting: vectorize truncation checks, reduce binary size

2020-06-21 Thread GitBox


wesm commented on a change in pull request #7506:
URL: https://github.com/apache/arrow/pull/7506#discussion_r443244978



##
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##
@@ -215,27 +215,23 @@ template 
 struct BoxScalar> {
   using T = typename GetOutputType::T;
   using ScalarType = typename TypeTraits::ScalarType;
-  static std::shared_ptr Box(T val, const std::shared_ptr& 
type) {
-return std::make_shared(val, type);
-  }
+  static void Box(T val, Scalar* out) { checked_cast(out)->value 
= val; }
 };
 
 template 
 struct BoxScalar> {
   using T = typename GetOutputType::T;
   using ScalarType = typename TypeTraits::ScalarType;
-  static std::shared_ptr Box(T val, const std::shared_ptr&) {
-return std::make_shared(val);
+  static void Box(T val, Scalar* out) {
+checked_cast(out)->value = std::make_shared(val);
   }
 };
 
 template <>
 struct BoxScalar {
   using T = Decimal128;
   using ScalarType = Decimal128Scalar;
-  static std::shared_ptr Box(T val, const std::shared_ptr& 
type) {
-return std::make_shared(val, type);
-  }
+  static void Box(T val, Scalar* out) { checked_cast(out)->value 
= val; }

Review comment:
   The prior implementation was causing a lot of compiled code to be 
generated for some reason, FYI





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org