edponce commented on a change in pull request #11882:
URL: https://github.com/apache/arrow/pull/11882#discussion_r776811241
##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -316,6 +316,18 @@ struct ARROW_EXPORT CompareOptions {
enum CompareOperator op;
};
+class ARROW_EXPORT BetweenOptions : public FunctionOptions {
+ public:
+ enum Inclusiveness { BOTH, LEFT, RIGHT, NEITHER };
Review comment:
Note: We will need to move this enum to global space as a scoped enum
for `not_between` so that they both use the same options. This can be done in
other PR.
##########
File path: cpp/src/arrow/compute/api_scalar.cc
##########
@@ -99,6 +99,30 @@ struct EnumTraits<compute::CompareOperator>
return "<INVALID>";
}
};
+
+template <>
+struct EnumTraits<compute::BetweenOptions::Inclusiveness>
+ : BasicEnumTraits<compute::BetweenOptions::Inclusiveness,
+ compute::BetweenOptions::Inclusiveness::BOTH,
+ compute::BetweenOptions::Inclusiveness::LEFT,
+ compute::BetweenOptions::Inclusiveness::RIGHT,
+ compute::BetweenOptions::Inclusiveness::NEITHER> {
+ static std::string name() { return "BetweenOptions::Inclusiveness"; }
+ static std::string value_name(compute::BetweenOptions::Inclusiveness value) {
+ switch (value) {
+ case compute::BetweenOptions::Inclusiveness::BOTH:
+ return "BOTH";
+ case compute::BetweenOptions::Inclusiveness::LEFT:
+ return "LEFT";
+ case compute::BetweenOptions::Inclusiveness::RIGHT:
+ return "RIGHT";
+ case compute::BetweenOptions::Inclusiveness::NEITHER:
Review comment:
`Inclusiveness` --> `Inclusive` so that it matches Pandas and is less
verbose.
##########
File path: cpp/src/arrow/util/bit_block_counter.h
##########
@@ -424,6 +565,141 @@ class ARROW_EXPORT OptionalBinaryBitBlockCounter {
}
};
+/// \brief A class that computes popcounts on the result of bitwise operations
+/// between three bitmaps, 64 bits at a time. A 64-bit word is loaded from each
+/// bitmap, then the popcount is computed on e.g. the bitwise-and of the three
+/// words.
+class ARROW_EXPORT TernaryBitBlockCounter {
+ public:
+ TernaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset,
+ const uint8_t* mid_bitmap, int64_t mid_offset,
+ const uint8_t* right_bitmap, int64_t right_offset,
+ int64_t length)
+ : left_bitmap_(util::MakeNonNull(left_bitmap) + left_offset / 8),
+ left_offset_(left_offset % 8),
+ mid_bitmap_(util::MakeNonNull(mid_bitmap) + mid_offset / 8),
+ mid_offset_(mid_offset % 8),
+ right_bitmap_(util::MakeNonNull(right_bitmap) + right_offset / 8),
+ right_offset_(right_offset % 8),
+ bits_remaining_(length) {}
+
+ /// \brief Return the popcount of the bitwise-and of the next run of
+ /// available bits, up to 64. The returned pair contains the size of run and
+ /// the number of true values. The last block will have a length less than 64
+ /// if the bitmap length is not a multiple of 64, and will return 0-length
+ /// blocks in subsequent invocations.
+ BitBlockCount NextAndAndWord() { return NextWord<detail::BitBlockAndAnd>(); }
Review comment:
The only version used is `NextAndAndWord`, so you can remove the other
ones.
##########
File path: cpp/src/arrow/compute/kernels/scalar_compare.cc
##########
@@ -746,6 +787,155 @@ std::shared_ptr<ScalarFunction>
MakeScalarMinMax(std::string name,
return func;
}
+template <typename Op>
+void AddIntegerBetween(const std::shared_ptr<DataType>& ty, ScalarFunction*
func) {
+ auto exec = GeneratePhysicalInteger<ScalarTernaryEqualTypes, BooleanType,
Op>(*ty);
+ DCHECK_OK(func->AddKernel({ty, ty, ty}, boolean(), std::move(exec)));
+}
+
+template <typename InType, typename Op>
+void AddGenericBetween(const std::shared_ptr<DataType>& ty, ScalarFunction*
func) {
+ DCHECK_OK(func->AddKernel({ty, ty, ty}, boolean(),
+ ScalarTernaryEqualTypes<BooleanType, InType,
Op>::Exec));
+}
+
+template <typename OutType, typename ArgType, typename Op>
+struct BetweenTimestamps : public ScalarTernaryEqualTypes<OutType, ArgType,
Op> {
+ using Base = ScalarTernaryEqualTypes<OutType, ArgType, Op>;
+
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ const auto& var = checked_cast<const TimestampType&>(*batch[0].type());
+ const auto& lhs = checked_cast<const TimestampType&>(*batch[1].type());
+ const auto& rhs = checked_cast<const TimestampType&>(*batch[2].type());
+ if ((var.timezone().empty() != lhs.timezone().empty()) ||
+ (var.timezone().empty() != rhs.timezone().empty()) ||
+ (lhs.timezone().empty() != rhs.timezone().empty())) {
+ return Status::Invalid("Cannot compare timestamps with and without
timezones.");
+ }
+ return Base::Exec(ctx, batch, out);
+ }
+};
+
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeBetweenFunction(std::string name,
+ const FunctionDoc* doc) {
+ auto func = std::make_shared<CompareFunction>(name, Arity::Ternary(), doc);
+
+ DCHECK_OK(func->AddKernel(
+ {boolean(), boolean(), boolean()}, boolean(),
+ ScalarTernary<BooleanType, BooleanType, BooleanType, BooleanType,
Op>::Exec));
+
+ for (const std::shared_ptr<DataType>& ty : IntTypes()) {
+ AddIntegerBetween<Op>(ty, func.get());
+ }
+
+ AddIntegerBetween<Op>(date32(), func.get());
+ AddIntegerBetween<Op>(date64(), func.get());
+
+ AddGenericBetween<FloatType, Op>(float32(), func.get());
+ AddGenericBetween<DoubleType, Op>(float64(), func.get());
+
+ // Add timestamp kernels
+ for (auto unit : TimeUnit::values()) {
+ InputType in_type(match::TimestampTypeUnit(unit));
+ DCHECK_OK(func->AddKernel({in_type, in_type, in_type}, boolean(),
+ BetweenTimestamps<BooleanType, TimestampType,
Op>::Exec));
+ }
+
+ // Duration
+ for (auto unit : TimeUnit::values()) {
+ InputType in_type(match::DurationTypeUnit(unit));
+ auto exec =
+ GeneratePhysicalInteger<ScalarTernaryEqualTypes, BooleanType,
Op>(int64());
+ DCHECK_OK(func->AddKernel({in_type, in_type, in_type}, boolean(),
std::move(exec)));
+ }
+
+ // Time32 and Time64
+ for (auto unit : {TimeUnit::SECOND, TimeUnit::MILLI}) {
+ InputType in_type(match::Time32TypeUnit(unit));
+ auto exec =
+ GeneratePhysicalInteger<ScalarTernaryEqualTypes, BooleanType,
Op>(int32());
+ DCHECK_OK(func->AddKernel({in_type, in_type, in_type}, boolean(),
std::move(exec)));
+ }
+ for (auto unit : {TimeUnit::MICRO, TimeUnit::NANO}) {
+ InputType in_type(match::Time64TypeUnit(unit));
+ auto exec =
+ GeneratePhysicalInteger<ScalarTernaryEqualTypes, BooleanType,
Op>(int64());
+ DCHECK_OK(func->AddKernel({in_type, in_type, in_type}, boolean(),
std::move(exec)));
+ }
+
+ for (const std::shared_ptr<DataType>& ty : BaseBinaryTypes()) {
+ auto exec = GenerateVarBinaryBase<ScalarTernaryEqualTypes, BooleanType,
Op>(*ty);
+ DCHECK_OK(func->AddKernel(
+ {InputType::Array(ty), InputType::Scalar(ty), InputType::Scalar(ty)},
boolean(),
+ exec));
+ DCHECK_OK(func->AddKernel(
+ {InputType::Array(ty), InputType::Array(ty), InputType::Array(ty)},
boolean(),
+ std::move(exec)));
+ }
+
+ for (const auto id : {Type::DECIMAL128, Type::DECIMAL256}) {
+ auto exec = GenerateDecimal<ScalarTernaryEqualTypes, BooleanType, Op>(id);
+ DCHECK_OK(func->AddKernel({InputType(id), InputType(id), InputType(id)},
boolean(),
+ std::move(exec)));
+ }
+
+ {
+ auto exec = ScalarTernaryEqualTypes<BooleanType, FixedSizeBinaryType,
Op>::Exec;
+ auto ty = InputType(Type::FIXED_SIZE_BINARY);
+ DCHECK_OK(func->AddKernel({ty, ty, ty}, boolean(), std::move(exec)));
+ }
+
+ return func;
+}
+
+const BetweenOptions* GetDefaultBetweenOptions() {
+ static const auto kBetweenOptions = BetweenOptions::Defaults();
+ return &kBetweenOptions;
+}
+
+const FunctionDoc between_doc{"Check if values are in a range, val betwen a
and b",
+ ("A null on either side emits a null comparison
result.\n"
+ "options are used to specify if the endpoints
are\n"
+ "inclusive, possible values are NEITHER
(a<val<b),\n"
+ "LEFT (a<=val<b), RIGHT (a<val<=b), and the
default\n"
+ "if not specified BOTH (a<=val<=b)"),
+ {"val", "a", "b"}};
+
+class BetweenMetaFunction : public MetaFunction {
+ public:
+ BetweenMetaFunction()
+ : MetaFunction("between", Arity::Ternary(), &between_doc,
+ GetDefaultBetweenOptions()) {}
+
+ Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
+ const FunctionOptions* options,
+ ExecContext* ctx) const override {
+ const BetweenOptions& between_options = static_cast<const
BetweenOptions&>(*options);
+ Datum result;
Review comment:
I disagree with this approach. It defeats the purpose of having the
`BetweenOptions` because they are being consumed by `CallFunction`.
I will take a stab at supporting internal dispatching based on
`BetweenOptions` and submit a PR to your branch. Up to now this is looking good
but needs a bit of tweaking.
##########
File path: cpp/src/arrow/util/bit_block_counter.h
##########
@@ -87,6 +87,147 @@ struct BitBlockOrNot<bool> {
static bool Call(bool left, bool right) { return left || !right; }
};
+// Three Arguments
+template <typename T>
+struct BitBlockAndAnd {
+ static T Call(T left, T mid, T right) { return left & mid & right; }
+};
+
+template <>
+struct BitBlockAndAnd<bool> {
+ static bool Call(bool left, bool mid, bool right) { return left && mid &&
right; }
Review comment:
Actually, the only combination used in this PR is `BitBlockAndAnd`, so
you can remove all the other ones. There is no reason to have dead code. Also,
ARROW-15220 got merged so you can see the new form for these operators. Note:
It will change [their use as template
parameters](https://github.com/apache/arrow/blob/master/cpp/src/arrow/util/bit_block_counter.h#L287-L288)
and how [`Call` is
invoked](https://github.com/apache/arrow/blob/master/cpp/src/arrow/util/bit_block_counter.h#L322).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]