kou commented on a change in pull request #11882: URL: https://github.com/apache/arrow/pull/11882#discussion_r767413145
########## File path: cpp/src/arrow/compute/kernels/scalar_compare_test.cc ########## @@ -1850,5 +1851,154 @@ TEST(TestMaxElementWiseMinElementWise, CommonTemporal) { ResultWith(ScalarFromJSON(date64(), "86400000"))); } +template <typename ArrowType> +static void ValidateBetween(const Datum& val, const Datum& lhs, const Datum& rhs, + const Datum& expected) { + ASSERT_OK_AND_ASSIGN(Datum result, Between(val, lhs, rhs)); + AssertArraysEqual(*expected.make_array(), *result.make_array(), + /*verbose=*/true); +} + +template <typename ArrowType> +static void ValidateBetween(const char* value_str, const Datum& lhs, const Datum& rhs, + const char* expected_str) { + auto value = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<ArrowType>(value, lhs, rhs, expected); +} + +template <> +void ValidateBetween<StringType>(const char* value_str, const Datum& lhs, + const Datum& rhs, const char* expected_str) { + auto value = ArrayFromJSON(utf8(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<StringType>(value, lhs, rhs, expected); +} + +template <typename ArrowType> +class TestNumericBetweenKernel : public ::testing::Test {}; + +TYPED_TEST_SUITE(TestNumericBetweenKernel, NumericArrowTypes); +TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayScalarScalar) { + using ScalarType = typename TypeTraits<TypeParam>::ScalarType; + using CType = typename TypeTraits<TypeParam>::CType; + + Datum zero(std::make_shared<ScalarType>(CType(0))); + Datum four(std::make_shared<ScalarType>(CType(4))); + ValidateBetween<TypeParam>("[]", zero, four, "[]"); + ValidateBetween<TypeParam>("[null]", zero, four, "[null]"); + ValidateBetween<TypeParam>("[0,0,1,1,2,2]", zero, four, "[0,0,1,1,1,1]"); + ValidateBetween<TypeParam>("[0,1,2,3,4,5]", zero, four, "[0,1,1,1,0,0]"); + ValidateBetween<TypeParam>("[5,4,3,2,1,0]", zero, four, "[0,0,1,1,1,0]"); + ValidateBetween<TypeParam>("[null,0,1,1]", zero, four, "[null,0,1,1]"); Review comment: Should we also add tests for `lhs` is NULL and/or `rhs` is NULL cases? ########## File path: cpp/src/arrow/compute/kernels/scalar_compare.cc ########## @@ -746,6 +768,76 @@ std::shared_ptr<ScalarFunction> MakeScalarMinMax(std::string name, return func; } +template <typename Op> +void AddIntegerBetween(const std::shared_ptr<DataType>& ty, ScalarFunction* func) { + auto exec = GeneratePhysicalInteger<ScalarTernaryEqualTypes, BooleanType, Op>(*ty); + DCHECK_OK(func->AddKernel({ty, ty, ty}, boolean(), std::move(exec))); +} + +template <typename InType, typename Op> +void AddGenericBetween(const std::shared_ptr<DataType>& ty, ScalarFunction* func) { + DCHECK_OK(func->AddKernel({ty, ty, ty}, boolean(), + ScalarTernaryEqualTypes<BooleanType, InType, Op>::Exec)); +} + +template <typename Op> +std::shared_ptr<ScalarFunction> MakeBetweenFunction(std::string name, + const FunctionDoc* doc) { + auto func = std::make_shared<CompareFunction>(name, Arity::Ternary(), doc); + + for (const std::shared_ptr<DataType>& ty : IntTypes()) { + AddIntegerBetween<Op>(ty, func.get()); + } + + AddIntegerBetween<Op>(date32(), func.get()); + AddIntegerBetween<Op>(date64(), func.get()); + + AddGenericBetween<FloatType, Op>(float32(), func.get()); + AddGenericBetween<DoubleType, Op>(float64(), func.get()); + + // Add timestamp kernels + for (auto unit : TimeUnit::values()) { + InputType in_type(match::TimestampTypeUnit(unit)); + auto exec = + GeneratePhysicalInteger<ScalarTernaryEqualTypes, BooleanType, Op>(int64()); + DCHECK_OK(func->AddKernel({in_type, in_type, in_type}, boolean(), std::move(exec))); Review comment: It seems that we need to care time zone here like #11541. ########## File path: cpp/src/arrow/compute/kernels/scalar_compare.cc ########## @@ -68,6 +72,32 @@ struct GreaterEqual { } }; +struct Between { + template <typename T, typename Arg0, typename Arg1, typename Arg2> + static constexpr T Call(KernelContext*, const Arg0& middle, const Arg1& left, + const Arg2& right, Status*) { + static_assert(std::is_same<T, bool>::value && std::is_same<Arg0, Arg1>::value && + std::is_same<Arg1, Arg2>::value, + ""); + return (left < middle) && (middle < right); + } +}; + +template <typename T> +using is_unsigned_integer = std::integral_constant<bool, std::is_integral<T>::value && + std::is_unsigned<T>::value>; Review comment: Did you copy this from `codegen_internal.h`? Can we reuse existing declaration? ########## File path: cpp/src/arrow/compute/kernels/scalar_compare_test.cc ########## @@ -1850,5 +1851,154 @@ TEST(TestMaxElementWiseMinElementWise, CommonTemporal) { ResultWith(ScalarFromJSON(date64(), "86400000"))); } +template <typename ArrowType> +static void ValidateBetween(const Datum& val, const Datum& lhs, const Datum& rhs, + const Datum& expected) { + ASSERT_OK_AND_ASSIGN(Datum result, Between(val, lhs, rhs)); + AssertArraysEqual(*expected.make_array(), *result.make_array(), + /*verbose=*/true); +} + +template <typename ArrowType> +static void ValidateBetween(const char* value_str, const Datum& lhs, const Datum& rhs, + const char* expected_str) { + auto value = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<ArrowType>(value, lhs, rhs, expected); +} + +template <> +void ValidateBetween<StringType>(const char* value_str, const Datum& lhs, + const Datum& rhs, const char* expected_str) { + auto value = ArrayFromJSON(utf8(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<StringType>(value, lhs, rhs, expected); +} + +template <typename ArrowType> +class TestNumericBetweenKernel : public ::testing::Test {}; + +TYPED_TEST_SUITE(TestNumericBetweenKernel, NumericArrowTypes); +TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayScalarScalar) { Review comment: Can we also add `RandomBetweenArrayScalarScalar` like `RandomComapreArrayScalar`? ########## File path: cpp/src/arrow/compute/kernels/scalar_compare.cc ########## @@ -786,6 +878,10 @@ const FunctionDoc max_element_wise_doc{ {"*args"}, "ElementWiseAggregateOptions"}; +const FunctionDoc between_doc{"Check if values are in a range x <= y <= z", Review comment: Is this correct? ```suggestion const FunctionDoc between_doc{"Check if values are in a range x < y < z", ``` ########## File path: cpp/src/arrow/compute/kernels/scalar_compare_test.cc ########## @@ -1850,5 +1851,154 @@ TEST(TestMaxElementWiseMinElementWise, CommonTemporal) { ResultWith(ScalarFromJSON(date64(), "86400000"))); } +template <typename ArrowType> +static void ValidateBetween(const Datum& val, const Datum& lhs, const Datum& rhs, + const Datum& expected) { + ASSERT_OK_AND_ASSIGN(Datum result, Between(val, lhs, rhs)); + AssertArraysEqual(*expected.make_array(), *result.make_array(), + /*verbose=*/true); +} + +template <typename ArrowType> +static void ValidateBetween(const char* value_str, const Datum& lhs, const Datum& rhs, + const char* expected_str) { + auto value = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<ArrowType>(value, lhs, rhs, expected); +} + +template <> +void ValidateBetween<StringType>(const char* value_str, const Datum& lhs, + const Datum& rhs, const char* expected_str) { + auto value = ArrayFromJSON(utf8(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<StringType>(value, lhs, rhs, expected); +} + +template <typename ArrowType> +class TestNumericBetweenKernel : public ::testing::Test {}; + +TYPED_TEST_SUITE(TestNumericBetweenKernel, NumericArrowTypes); +TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayScalarScalar) { + using ScalarType = typename TypeTraits<TypeParam>::ScalarType; + using CType = typename TypeTraits<TypeParam>::CType; + + Datum zero(std::make_shared<ScalarType>(CType(0))); + Datum four(std::make_shared<ScalarType>(CType(4))); + ValidateBetween<TypeParam>("[]", zero, four, "[]"); + ValidateBetween<TypeParam>("[null]", zero, four, "[null]"); + ValidateBetween<TypeParam>("[0,0,1,1,2,2]", zero, four, "[0,0,1,1,1,1]"); + ValidateBetween<TypeParam>("[0,1,2,3,4,5]", zero, four, "[0,1,1,1,0,0]"); + ValidateBetween<TypeParam>("[5,4,3,2,1,0]", zero, four, "[0,0,1,1,1,0]"); + ValidateBetween<TypeParam>("[null,0,1,1]", zero, four, "[null,0,1,1]"); +} + +TEST(TestSimpleBetweenKernel, SimpleStringTest) { + using ScalarType = typename TypeTraits<StringType>::ScalarType; + auto l = Datum(std::make_shared<ScalarType>("abc")); + auto r = Datum(std::make_shared<ScalarType>("zzz")); + ValidateBetween<StringType>("[]", l, r, "[]"); + ValidateBetween<StringType>("[null]", l, r, "[null]"); + ValidateBetween<StringType>(R"(["aaa", "aaaa", "ccc", "z"])", l, r, + R"([false, false, true, true])"); + ValidateBetween<StringType>(R"(["a", "aaaa", "c", "z"])", l, r, + R"([false, false, true, true])"); + ValidateBetween<StringType>(R"(["a", "aaaa", "fff", "zzzz"])", l, r, + R"([false, false, true, false])"); + ValidateBetween<StringType>(R"(["abd", null, null, "zzx"])", l, r, + R"([true, null, null, true])"); +} + +TEST(TestSimpleBetweenKernel, SimpleTimestampTest) { + using ScalarType = typename TypeTraits<TimestampType>::ScalarType; + auto checkTimestampArray = [](std::shared_ptr<DataType> type, const char* input_str, + const Datum& lhs, const Datum& rhs, + const char* expected_str) { + auto value = ArrayFromJSON(type, input_str); + auto expected = ArrayFromJSON(boolean(), expected_str); + ValidateBetween<TimestampType>(value, lhs, rhs, expected); + }; + auto unit = TimeUnit::SECOND; + auto l = Datum(std::make_shared<ScalarType>(923184000, timestamp(unit))); + auto r = Datum(std::make_shared<ScalarType>(1602032602, timestamp(unit))); + checkTimestampArray(timestamp(unit), "[]", l, r, "[]"); + checkTimestampArray(timestamp(unit), "[null]", l, r, "[null]"); + checkTimestampArray(timestamp(unit), R"(["1970-01-01","2000-02-29","1900-02-28"])", l, + r, "[false,true,false]"); + checkTimestampArray(timestamp(unit), R"(["1970-01-01","2000-02-29","2004-02-28"])", l, + r, "[false,true,true]"); + checkTimestampArray(timestamp(unit), R"(["2018-01-01","1999-04-04","1900-02-28"])", l, + r, "[true,false,false]"); +} + +TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayArrayArray) { + ValidateBetween<TypeParam>( + "[]", ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[]"), + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[]"), "[]"); + ValidateBetween<TypeParam>( + "[null]", ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[null]"), + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[null]"), "[null]"); + ValidateBetween<TypeParam>( + "[1,1,2,2,2]", + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[0,0,1,3,3]"), + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[10,10,2,5,5]"), + "[true,true,false,false,false]"); + ValidateBetween<TypeParam>( + "[1,1,2,2,2,2]", + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[0,0,1,null,3,3]"), + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[10,10,2,2,5,5]"), + "[true,true,false,null,false,false]"); +} + +TEST(TestSimpleBetweenKernel, StringArrayArrayArrayTest) { + ValidateBetween<StringType>( + R"(["david","hello","world"])", + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["adam","hi","whirl"])"), + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), + R"(["robert","goeiemoreen","whirlwind"])"), + "[true, false, false]"); + ValidateBetween<StringType>( + R"(["x","a","f"])", + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["w","a","e"])"), + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["z","a","g"])"), + "[true, false, true]"); + ValidateBetween<StringType>( + R"(["block","bit","binary"])", + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), + R"(["bit","nibble","ternary"])"), + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["word","d","xyz"])"), + "[true, false, false]"); + ValidateBetween<StringType>(R"(["Ayumi","アユミ","王梦莹"])", + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), + R"(["たなか","あゆみ","歩美"])"), + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), + R"(["李平之","田中","たなか"])"), + "[false, true, false]"); Review comment: Do we need this case? ########## File path: cpp/src/arrow/util/bit_block_counter.h ########## @@ -586,5 +961,144 @@ static void VisitTwoBitBlocksVoid(const std::shared_ptr<Buffer>& left_bitmap_buf } } +template <typename VisitNotNull, typename VisitNull> +static void VisitThreeBitBlocksVoid( + const std::shared_ptr<Buffer>& left_bitmap_buf, int64_t left_offset, + const std::shared_ptr<Buffer>& mid_bitmap_buf, int64_t mid_offset, + const std::shared_ptr<Buffer>& right_bitmap_buf, int64_t right_offset, int64_t length, + VisitNotNull&& visit_not_null, VisitNull&& visit_null) { + if (((left_bitmap_buf == NULLPTR) && (mid_bitmap_buf == NULLPTR)) || + ((mid_bitmap_buf == NULLPTR) && (right_bitmap_buf == NULLPTR)) || + ((left_bitmap_buf == NULLPTR) && (right_bitmap_buf == NULLPTR))) { + // At most one bitmap is present + if ((left_bitmap_buf == NULLPTR) && (mid_bitmap_buf == NULLPTR)) { + return VisitBitBlocksVoid(right_bitmap_buf, right_offset, length, + std::forward<VisitNotNull>(visit_not_null), + std::forward<VisitNull>(visit_null)); + } else if ((mid_bitmap_buf == NULLPTR) && (right_bitmap_buf == NULLPTR)) { + return VisitBitBlocksVoid(left_bitmap_buf, left_offset, length, + std::forward<VisitNotNull>(visit_not_null), + std::forward<VisitNull>(visit_null)); + } else { + return VisitBitBlocksVoid(mid_bitmap_buf, mid_offset, length, + std::forward<VisitNotNull>(visit_not_null), + std::forward<VisitNull>(visit_null)); + } + } + // Two bitmaps are present + if (left_bitmap_buf == NULLPTR) { + const uint8_t* mid_bitmap = mid_bitmap_buf->data(); Review comment: Can we use `VisitTwoBitBlocksVoid()` here? ########## File path: cpp/src/arrow/compute/kernels/scalar_compare_test.cc ########## @@ -1850,5 +1851,154 @@ TEST(TestMaxElementWiseMinElementWise, CommonTemporal) { ResultWith(ScalarFromJSON(date64(), "86400000"))); } +template <typename ArrowType> +static void ValidateBetween(const Datum& val, const Datum& lhs, const Datum& rhs, + const Datum& expected) { + ASSERT_OK_AND_ASSIGN(Datum result, Between(val, lhs, rhs)); + AssertArraysEqual(*expected.make_array(), *result.make_array(), + /*verbose=*/true); +} + +template <typename ArrowType> +static void ValidateBetween(const char* value_str, const Datum& lhs, const Datum& rhs, + const char* expected_str) { + auto value = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<ArrowType>(value, lhs, rhs, expected); +} + +template <> +void ValidateBetween<StringType>(const char* value_str, const Datum& lhs, + const Datum& rhs, const char* expected_str) { + auto value = ArrayFromJSON(utf8(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<StringType>(value, lhs, rhs, expected); +} + +template <typename ArrowType> +class TestNumericBetweenKernel : public ::testing::Test {}; + +TYPED_TEST_SUITE(TestNumericBetweenKernel, NumericArrowTypes); +TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayScalarScalar) { + using ScalarType = typename TypeTraits<TypeParam>::ScalarType; + using CType = typename TypeTraits<TypeParam>::CType; + + Datum zero(std::make_shared<ScalarType>(CType(0))); + Datum four(std::make_shared<ScalarType>(CType(4))); + ValidateBetween<TypeParam>("[]", zero, four, "[]"); + ValidateBetween<TypeParam>("[null]", zero, four, "[null]"); + ValidateBetween<TypeParam>("[0,0,1,1,2,2]", zero, four, "[0,0,1,1,1,1]"); + ValidateBetween<TypeParam>("[0,1,2,3,4,5]", zero, four, "[0,1,1,1,0,0]"); + ValidateBetween<TypeParam>("[5,4,3,2,1,0]", zero, four, "[0,0,1,1,1,0]"); + ValidateBetween<TypeParam>("[null,0,1,1]", zero, four, "[null,0,1,1]"); +} + +TEST(TestSimpleBetweenKernel, SimpleStringTest) { + using ScalarType = typename TypeTraits<StringType>::ScalarType; + auto l = Datum(std::make_shared<ScalarType>("abc")); + auto r = Datum(std::make_shared<ScalarType>("zzz")); + ValidateBetween<StringType>("[]", l, r, "[]"); + ValidateBetween<StringType>("[null]", l, r, "[null]"); + ValidateBetween<StringType>(R"(["aaa", "aaaa", "ccc", "z"])", l, r, + R"([false, false, true, true])"); + ValidateBetween<StringType>(R"(["a", "aaaa", "c", "z"])", l, r, + R"([false, false, true, true])"); + ValidateBetween<StringType>(R"(["a", "aaaa", "fff", "zzzz"])", l, r, + R"([false, false, true, false])"); + ValidateBetween<StringType>(R"(["abd", null, null, "zzx"])", l, r, + R"([true, null, null, true])"); +} + +TEST(TestSimpleBetweenKernel, SimpleTimestampTest) { Review comment: Did you copy this from `TEST(TestCompareTimestamps, Basics)`? Do you want to add between kernel version of `TEST(TestCompareTimestamps, DifferentParameters)` too? ########## File path: cpp/src/arrow/compute/kernels/codegen_internal.h ########## @@ -583,7 +606,23 @@ struct OutputAdapter<Type, enable_if_base_binary<Type>> { return Status::NotImplemented("NYI"); } }; +/* +template <typename Type> +struct OutputAdapter<Type, enable_if_base_binary<Type>> { + using T = typename TypeTraits<Type>::ScalarType::ValueType; + template <typename Generator> + static Status Write(KernelContext*, Datum* out, Generator&& generator) { + ArrayData* out_arr = out->mutable_array(); + auto out_data = out_arr->GetMutableValues<T>(1); + // TODO: Is this as fast as a more explicitly inlined function? + for (int64_t i = 0; i < out_arr->length; ++i) { + *out_data++ = generator(); + } + return Status::OK(); + } +}; +*/ Review comment: Should we remove this? ########## File path: cpp/src/arrow/compute/kernels/scalar_compare_test.cc ########## @@ -1850,5 +1851,154 @@ TEST(TestMaxElementWiseMinElementWise, CommonTemporal) { ResultWith(ScalarFromJSON(date64(), "86400000"))); } +template <typename ArrowType> +static void ValidateBetween(const Datum& val, const Datum& lhs, const Datum& rhs, + const Datum& expected) { + ASSERT_OK_AND_ASSIGN(Datum result, Between(val, lhs, rhs)); + AssertArraysEqual(*expected.make_array(), *result.make_array(), + /*verbose=*/true); +} + +template <typename ArrowType> +static void ValidateBetween(const char* value_str, const Datum& lhs, const Datum& rhs, + const char* expected_str) { + auto value = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<ArrowType>(value, lhs, rhs, expected); +} + +template <> +void ValidateBetween<StringType>(const char* value_str, const Datum& lhs, + const Datum& rhs, const char* expected_str) { + auto value = ArrayFromJSON(utf8(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<StringType>(value, lhs, rhs, expected); +} + +template <typename ArrowType> +class TestNumericBetweenKernel : public ::testing::Test {}; + +TYPED_TEST_SUITE(TestNumericBetweenKernel, NumericArrowTypes); +TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayScalarScalar) { + using ScalarType = typename TypeTraits<TypeParam>::ScalarType; + using CType = typename TypeTraits<TypeParam>::CType; + + Datum zero(std::make_shared<ScalarType>(CType(0))); + Datum four(std::make_shared<ScalarType>(CType(4))); + ValidateBetween<TypeParam>("[]", zero, four, "[]"); + ValidateBetween<TypeParam>("[null]", zero, four, "[null]"); + ValidateBetween<TypeParam>("[0,0,1,1,2,2]", zero, four, "[0,0,1,1,1,1]"); + ValidateBetween<TypeParam>("[0,1,2,3,4,5]", zero, four, "[0,1,1,1,0,0]"); + ValidateBetween<TypeParam>("[5,4,3,2,1,0]", zero, four, "[0,0,1,1,1,0]"); + ValidateBetween<TypeParam>("[null,0,1,1]", zero, four, "[null,0,1,1]"); +} + +TEST(TestSimpleBetweenKernel, SimpleStringTest) { Review comment: Do we need `Simple`s here? Do you want to add more tests? ########## File path: cpp/src/arrow/compute/kernels/scalar_compare_test.cc ########## @@ -1850,5 +1851,154 @@ TEST(TestMaxElementWiseMinElementWise, CommonTemporal) { ResultWith(ScalarFromJSON(date64(), "86400000"))); } +template <typename ArrowType> +static void ValidateBetween(const Datum& val, const Datum& lhs, const Datum& rhs, + const Datum& expected) { + ASSERT_OK_AND_ASSIGN(Datum result, Between(val, lhs, rhs)); + AssertArraysEqual(*expected.make_array(), *result.make_array(), + /*verbose=*/true); +} + +template <typename ArrowType> +static void ValidateBetween(const char* value_str, const Datum& lhs, const Datum& rhs, + const char* expected_str) { + auto value = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<ArrowType>(value, lhs, rhs, expected); +} + +template <> +void ValidateBetween<StringType>(const char* value_str, const Datum& lhs, + const Datum& rhs, const char* expected_str) { + auto value = ArrayFromJSON(utf8(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<StringType>(value, lhs, rhs, expected); +} + +template <typename ArrowType> +class TestNumericBetweenKernel : public ::testing::Test {}; + +TYPED_TEST_SUITE(TestNumericBetweenKernel, NumericArrowTypes); +TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayScalarScalar) { + using ScalarType = typename TypeTraits<TypeParam>::ScalarType; + using CType = typename TypeTraits<TypeParam>::CType; + + Datum zero(std::make_shared<ScalarType>(CType(0))); + Datum four(std::make_shared<ScalarType>(CType(4))); + ValidateBetween<TypeParam>("[]", zero, four, "[]"); + ValidateBetween<TypeParam>("[null]", zero, four, "[null]"); + ValidateBetween<TypeParam>("[0,0,1,1,2,2]", zero, four, "[0,0,1,1,1,1]"); + ValidateBetween<TypeParam>("[0,1,2,3,4,5]", zero, four, "[0,1,1,1,0,0]"); + ValidateBetween<TypeParam>("[5,4,3,2,1,0]", zero, four, "[0,0,1,1,1,0]"); + ValidateBetween<TypeParam>("[null,0,1,1]", zero, four, "[null,0,1,1]"); +} + +TEST(TestSimpleBetweenKernel, SimpleStringTest) { + using ScalarType = typename TypeTraits<StringType>::ScalarType; + auto l = Datum(std::make_shared<ScalarType>("abc")); + auto r = Datum(std::make_shared<ScalarType>("zzz")); + ValidateBetween<StringType>("[]", l, r, "[]"); + ValidateBetween<StringType>("[null]", l, r, "[null]"); + ValidateBetween<StringType>(R"(["aaa", "aaaa", "ccc", "z"])", l, r, + R"([false, false, true, true])"); + ValidateBetween<StringType>(R"(["a", "aaaa", "c", "z"])", l, r, + R"([false, false, true, true])"); + ValidateBetween<StringType>(R"(["a", "aaaa", "fff", "zzzz"])", l, r, + R"([false, false, true, false])"); + ValidateBetween<StringType>(R"(["abd", null, null, "zzx"])", l, r, + R"([true, null, null, true])"); +} + +TEST(TestSimpleBetweenKernel, SimpleTimestampTest) { + using ScalarType = typename TypeTraits<TimestampType>::ScalarType; + auto checkTimestampArray = [](std::shared_ptr<DataType> type, const char* input_str, + const Datum& lhs, const Datum& rhs, + const char* expected_str) { + auto value = ArrayFromJSON(type, input_str); + auto expected = ArrayFromJSON(boolean(), expected_str); + ValidateBetween<TimestampType>(value, lhs, rhs, expected); + }; + auto unit = TimeUnit::SECOND; + auto l = Datum(std::make_shared<ScalarType>(923184000, timestamp(unit))); + auto r = Datum(std::make_shared<ScalarType>(1602032602, timestamp(unit))); + checkTimestampArray(timestamp(unit), "[]", l, r, "[]"); + checkTimestampArray(timestamp(unit), "[null]", l, r, "[null]"); + checkTimestampArray(timestamp(unit), R"(["1970-01-01","2000-02-29","1900-02-28"])", l, + r, "[false,true,false]"); + checkTimestampArray(timestamp(unit), R"(["1970-01-01","2000-02-29","2004-02-28"])", l, + r, "[false,true,true]"); + checkTimestampArray(timestamp(unit), R"(["2018-01-01","1999-04-04","1900-02-28"])", l, + r, "[true,false,false]"); +} + +TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayArrayArray) { + ValidateBetween<TypeParam>( + "[]", ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[]"), + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[]"), "[]"); + ValidateBetween<TypeParam>( + "[null]", ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[null]"), + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[null]"), "[null]"); + ValidateBetween<TypeParam>( + "[1,1,2,2,2]", + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[0,0,1,3,3]"), + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[10,10,2,5,5]"), + "[true,true,false,false,false]"); + ValidateBetween<TypeParam>( + "[1,1,2,2,2,2]", + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[0,0,1,null,3,3]"), + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[10,10,2,2,5,5]"), + "[true,true,false,null,false,false]"); +} + +TEST(TestSimpleBetweenKernel, StringArrayArrayArrayTest) { + ValidateBetween<StringType>( + R"(["david","hello","world"])", + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["adam","hi","whirl"])"), + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), + R"(["robert","goeiemoreen","whirlwind"])"), + "[true, false, false]"); + ValidateBetween<StringType>( + R"(["x","a","f"])", + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["w","a","e"])"), + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["z","a","g"])"), + "[true, false, true]"); + ValidateBetween<StringType>( + R"(["block","bit","binary"])", + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), + R"(["bit","nibble","ternary"])"), + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["word","d","xyz"])"), + "[true, false, false]"); + ValidateBetween<StringType>(R"(["Ayumi","アユミ","王梦莹"])", + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), + R"(["たなか","あゆみ","歩美"])"), + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), + R"(["李平之","田中","たなか"])"), + "[false, true, false]"); +} Review comment: Do we need to add `null` in `lhs` and/or `rhs` cases? ########## File path: cpp/src/arrow/compute/kernels/scalar_compare_benchmark.cc ########## @@ -77,5 +77,42 @@ BENCHMARK(GreaterArrayScalarInt64)->Apply(RegressionSetArgs); BENCHMARK(GreaterArrayArrayString)->Apply(RegressionSetArgs); BENCHMARK(GreaterArrayScalarString)->Apply(RegressionSetArgs); +template <typename Type> +static void BetweenScalarArrayScalar(benchmark::State& state) { + RegressionArgs args(state, /*size_is_bytes=*/false); + auto ty = TypeTraits<Type>::type_singleton(); + auto rand = random::RandomArrayGenerator(kSeed); + auto array = rand.ArrayOf(ty, args.size, args.null_proportion); + auto scalar_left = *rand.ArrayOf(ty, 1, 0)->GetScalar(0); + auto scalar_right = *rand.ArrayOf(ty, 1, .0)->GetScalar(0); Review comment: ```suggestion auto scalar_right = *rand.ArrayOf(ty, 1, 0)->GetScalar(0); ``` ########## File path: cpp/src/arrow/compute/kernels/scalar_compare_test.cc ########## @@ -1850,5 +1851,154 @@ TEST(TestMaxElementWiseMinElementWise, CommonTemporal) { ResultWith(ScalarFromJSON(date64(), "86400000"))); } +template <typename ArrowType> +static void ValidateBetween(const Datum& val, const Datum& lhs, const Datum& rhs, + const Datum& expected) { + ASSERT_OK_AND_ASSIGN(Datum result, Between(val, lhs, rhs)); + AssertArraysEqual(*expected.make_array(), *result.make_array(), + /*verbose=*/true); +} + +template <typename ArrowType> +static void ValidateBetween(const char* value_str, const Datum& lhs, const Datum& rhs, + const char* expected_str) { + auto value = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<ArrowType>(value, lhs, rhs, expected); +} + +template <> +void ValidateBetween<StringType>(const char* value_str, const Datum& lhs, + const Datum& rhs, const char* expected_str) { + auto value = ArrayFromJSON(utf8(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<StringType>(value, lhs, rhs, expected); +} + +template <typename ArrowType> +class TestNumericBetweenKernel : public ::testing::Test {}; + +TYPED_TEST_SUITE(TestNumericBetweenKernel, NumericArrowTypes); +TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayScalarScalar) { + using ScalarType = typename TypeTraits<TypeParam>::ScalarType; + using CType = typename TypeTraits<TypeParam>::CType; + + Datum zero(std::make_shared<ScalarType>(CType(0))); + Datum four(std::make_shared<ScalarType>(CType(4))); + ValidateBetween<TypeParam>("[]", zero, four, "[]"); + ValidateBetween<TypeParam>("[null]", zero, four, "[null]"); + ValidateBetween<TypeParam>("[0,0,1,1,2,2]", zero, four, "[0,0,1,1,1,1]"); + ValidateBetween<TypeParam>("[0,1,2,3,4,5]", zero, four, "[0,1,1,1,0,0]"); + ValidateBetween<TypeParam>("[5,4,3,2,1,0]", zero, four, "[0,0,1,1,1,0]"); + ValidateBetween<TypeParam>("[null,0,1,1]", zero, four, "[null,0,1,1]"); +} + +TEST(TestSimpleBetweenKernel, SimpleStringTest) { + using ScalarType = typename TypeTraits<StringType>::ScalarType; + auto l = Datum(std::make_shared<ScalarType>("abc")); + auto r = Datum(std::make_shared<ScalarType>("zzz")); + ValidateBetween<StringType>("[]", l, r, "[]"); + ValidateBetween<StringType>("[null]", l, r, "[null]"); + ValidateBetween<StringType>(R"(["aaa", "aaaa", "ccc", "z"])", l, r, + R"([false, false, true, true])"); + ValidateBetween<StringType>(R"(["a", "aaaa", "c", "z"])", l, r, + R"([false, false, true, true])"); + ValidateBetween<StringType>(R"(["a", "aaaa", "fff", "zzzz"])", l, r, + R"([false, false, true, false])"); + ValidateBetween<StringType>(R"(["abd", null, null, "zzx"])", l, r, + R"([true, null, null, true])"); +} + +TEST(TestSimpleBetweenKernel, SimpleTimestampTest) { + using ScalarType = typename TypeTraits<TimestampType>::ScalarType; + auto checkTimestampArray = [](std::shared_ptr<DataType> type, const char* input_str, + const Datum& lhs, const Datum& rhs, + const char* expected_str) { + auto value = ArrayFromJSON(type, input_str); + auto expected = ArrayFromJSON(boolean(), expected_str); + ValidateBetween<TimestampType>(value, lhs, rhs, expected); + }; + auto unit = TimeUnit::SECOND; + auto l = Datum(std::make_shared<ScalarType>(923184000, timestamp(unit))); + auto r = Datum(std::make_shared<ScalarType>(1602032602, timestamp(unit))); + checkTimestampArray(timestamp(unit), "[]", l, r, "[]"); + checkTimestampArray(timestamp(unit), "[null]", l, r, "[null]"); + checkTimestampArray(timestamp(unit), R"(["1970-01-01","2000-02-29","1900-02-28"])", l, + r, "[false,true,false]"); + checkTimestampArray(timestamp(unit), R"(["1970-01-01","2000-02-29","2004-02-28"])", l, + r, "[false,true,true]"); + checkTimestampArray(timestamp(unit), R"(["2018-01-01","1999-04-04","1900-02-28"])", l, + r, "[true,false,false]"); +} + +TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayArrayArray) { Review comment: Could you move this to right after `TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayScalarScalar)`? ########## File path: cpp/src/arrow/compute/kernels/codegen_internal.h ########## @@ -982,6 +1021,407 @@ template <typename OutType, typename ArgType, typename Op> using ScalarBinaryNotNullStatefulEqualTypes = ScalarBinaryNotNullStateful<OutType, ArgType, ArgType, Op>; +// A kernel exec generator for ternary functions that addresses both array and +// scalar inputs and dispatches input iteration and output writing to other +// templates +// +// This template executes the operator even on the data behind null values, +// therefore it is generally only suitable for operators that are safe to apply +// even on the null slot values. +// +// The "Op" functor should have the form +// +// struct Op { +// template <typename OutValue, typename Arg0Value, typename Arg1Value, typename +// Arg2Value> static OutValue Call(KernelContext* ctx, Arg0Value arg0, Arg1Value arg1, +// Arg2Value arg2, Status *st) { Review comment: Could you format this? This is difficult to read. ########## File path: cpp/src/arrow/compute/kernels/scalar_compare_test.cc ########## @@ -1850,5 +1851,154 @@ TEST(TestMaxElementWiseMinElementWise, CommonTemporal) { ResultWith(ScalarFromJSON(date64(), "86400000"))); } +template <typename ArrowType> +static void ValidateBetween(const Datum& val, const Datum& lhs, const Datum& rhs, + const Datum& expected) { + ASSERT_OK_AND_ASSIGN(Datum result, Between(val, lhs, rhs)); + AssertArraysEqual(*expected.make_array(), *result.make_array(), + /*verbose=*/true); +} + +template <typename ArrowType> +static void ValidateBetween(const char* value_str, const Datum& lhs, const Datum& rhs, + const char* expected_str) { + auto value = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<ArrowType>(value, lhs, rhs, expected); +} + +template <> +void ValidateBetween<StringType>(const char* value_str, const Datum& lhs, + const Datum& rhs, const char* expected_str) { + auto value = ArrayFromJSON(utf8(), value_str); + auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str); + ValidateBetween<StringType>(value, lhs, rhs, expected); +} + +template <typename ArrowType> +class TestNumericBetweenKernel : public ::testing::Test {}; + +TYPED_TEST_SUITE(TestNumericBetweenKernel, NumericArrowTypes); +TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayScalarScalar) { + using ScalarType = typename TypeTraits<TypeParam>::ScalarType; + using CType = typename TypeTraits<TypeParam>::CType; + + Datum zero(std::make_shared<ScalarType>(CType(0))); + Datum four(std::make_shared<ScalarType>(CType(4))); + ValidateBetween<TypeParam>("[]", zero, four, "[]"); + ValidateBetween<TypeParam>("[null]", zero, four, "[null]"); + ValidateBetween<TypeParam>("[0,0,1,1,2,2]", zero, four, "[0,0,1,1,1,1]"); + ValidateBetween<TypeParam>("[0,1,2,3,4,5]", zero, four, "[0,1,1,1,0,0]"); + ValidateBetween<TypeParam>("[5,4,3,2,1,0]", zero, four, "[0,0,1,1,1,0]"); + ValidateBetween<TypeParam>("[null,0,1,1]", zero, four, "[null,0,1,1]"); +} + +TEST(TestSimpleBetweenKernel, SimpleStringTest) { + using ScalarType = typename TypeTraits<StringType>::ScalarType; + auto l = Datum(std::make_shared<ScalarType>("abc")); + auto r = Datum(std::make_shared<ScalarType>("zzz")); + ValidateBetween<StringType>("[]", l, r, "[]"); + ValidateBetween<StringType>("[null]", l, r, "[null]"); + ValidateBetween<StringType>(R"(["aaa", "aaaa", "ccc", "z"])", l, r, + R"([false, false, true, true])"); + ValidateBetween<StringType>(R"(["a", "aaaa", "c", "z"])", l, r, + R"([false, false, true, true])"); + ValidateBetween<StringType>(R"(["a", "aaaa", "fff", "zzzz"])", l, r, + R"([false, false, true, false])"); + ValidateBetween<StringType>(R"(["abd", null, null, "zzx"])", l, r, + R"([true, null, null, true])"); +} + +TEST(TestSimpleBetweenKernel, SimpleTimestampTest) { + using ScalarType = typename TypeTraits<TimestampType>::ScalarType; + auto checkTimestampArray = [](std::shared_ptr<DataType> type, const char* input_str, + const Datum& lhs, const Datum& rhs, + const char* expected_str) { + auto value = ArrayFromJSON(type, input_str); + auto expected = ArrayFromJSON(boolean(), expected_str); + ValidateBetween<TimestampType>(value, lhs, rhs, expected); + }; + auto unit = TimeUnit::SECOND; + auto l = Datum(std::make_shared<ScalarType>(923184000, timestamp(unit))); + auto r = Datum(std::make_shared<ScalarType>(1602032602, timestamp(unit))); + checkTimestampArray(timestamp(unit), "[]", l, r, "[]"); + checkTimestampArray(timestamp(unit), "[null]", l, r, "[null]"); + checkTimestampArray(timestamp(unit), R"(["1970-01-01","2000-02-29","1900-02-28"])", l, + r, "[false,true,false]"); + checkTimestampArray(timestamp(unit), R"(["1970-01-01","2000-02-29","2004-02-28"])", l, + r, "[false,true,true]"); + checkTimestampArray(timestamp(unit), R"(["2018-01-01","1999-04-04","1900-02-28"])", l, + r, "[true,false,false]"); +} + +TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayArrayArray) { + ValidateBetween<TypeParam>( + "[]", ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[]"), + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[]"), "[]"); + ValidateBetween<TypeParam>( + "[null]", ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[null]"), + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[null]"), "[null]"); + ValidateBetween<TypeParam>( + "[1,1,2,2,2]", + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[0,0,1,3,3]"), + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[10,10,2,5,5]"), + "[true,true,false,false,false]"); + ValidateBetween<TypeParam>( + "[1,1,2,2,2,2]", + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[0,0,1,null,3,3]"), + ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[10,10,2,2,5,5]"), + "[true,true,false,null,false,false]"); +} + +TEST(TestSimpleBetweenKernel, StringArrayArrayArrayTest) { + ValidateBetween<StringType>( + R"(["david","hello","world"])", + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["adam","hi","whirl"])"), + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), + R"(["robert","goeiemoreen","whirlwind"])"), + "[true, false, false]"); + ValidateBetween<StringType>( + R"(["x","a","f"])", + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["w","a","e"])"), + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["z","a","g"])"), + "[true, false, true]"); + ValidateBetween<StringType>( + R"(["block","bit","binary"])", + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), + R"(["bit","nibble","ternary"])"), + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["word","d","xyz"])"), + "[true, false, false]"); + ValidateBetween<StringType>(R"(["Ayumi","アユミ","王梦莹"])", + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), + R"(["たなか","あゆみ","歩美"])"), + ArrayFromJSON(TypeTraits<StringType>::type_singleton(), + R"(["李平之","田中","たなか"])"), + "[false, true, false]"); +} + +TEST(TestSimpleBetweenKernel, TimestampArrayArrayArrayTest) { Review comment: Do we also need different time unit case? ########## File path: cpp/src/arrow/compute/kernels/codegen_internal.h ########## @@ -982,6 +1021,407 @@ template <typename OutType, typename ArgType, typename Op> using ScalarBinaryNotNullStatefulEqualTypes = ScalarBinaryNotNullStateful<OutType, ArgType, ArgType, Op>; +// A kernel exec generator for ternary functions that addresses both array and +// scalar inputs and dispatches input iteration and output writing to other +// templates +// +// This template executes the operator even on the data behind null values, +// therefore it is generally only suitable for operators that are safe to apply +// even on the null slot values. +// +// The "Op" functor should have the form +// +// struct Op { +// template <typename OutValue, typename Arg0Value, typename Arg1Value, typename +// Arg2Value> static OutValue Call(KernelContext* ctx, Arg0Value arg0, Arg1Value arg1, +// Arg2Value arg2, Status *st) { +// // implementation +// // NOTE: "status" should only be populated with errors, +// // leave it unmodified to indicate Status::OK() +// } +// }; +template <typename OutType, typename Arg0Type, typename Arg1Type, typename Arg2Type, + typename Op> +struct ScalarTernary { + using OutValue = typename GetOutputType<OutType>::T; + using Arg0Value = typename GetViewType<Arg0Type>::T; + using Arg1Value = typename GetViewType<Arg1Type>::T; + using Arg2Value = typename GetViewType<Arg2Type>::T; + + static Status ArrayArrayArray(KernelContext* ctx, const ArrayData& arg0, + const ArrayData& arg1, const ArrayData& arg2, + Datum* out) { + Status st = Status::OK(); + ArrayIterator<Arg0Type> arg0_it(arg0); + ArrayIterator<Arg1Type> arg1_it(arg1); + ArrayIterator<Arg2Type> arg2_it(arg2); + RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue { + return Op::template Call<OutValue, Arg0Value, Arg1Value, Arg2Value>( + ctx, arg0_it(), arg1_it(), arg2_it(), &st); + })); + return st; + } + + static Status ArrayArrayScalar(KernelContext* ctx, const ArrayData& arg0, + const ArrayData& arg1, const Scalar& arg2, Datum* out) { + Status st = Status::OK(); + ArrayIterator<Arg0Type> arg0_it(arg0); + ArrayIterator<Arg1Type> arg1_it(arg1); + auto arg2_val = UnboxScalar<Arg2Type>::Unbox(arg2); + RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue { + return Op::template Call<OutValue, Arg0Value, Arg1Value, Arg2Value>( + ctx, arg0_it(), arg1_it(), arg2_val, &st); + })); + return st; + } + + static Status ArrayScalarArray(KernelContext* ctx, const ArrayData& arg0, + const Scalar& arg1, const ArrayData& arg2, Datum* out) { + Status st = Status::OK(); + ArrayIterator<Arg0Type> arg0_it(arg0); + auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1); + ArrayIterator<Arg2Type> arg2_it(arg2); + RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue { + return Op::template Call<OutValue, Arg0Value, Arg1Value, Arg2Value>( + ctx, arg0_it(), arg1_val, arg2_it(), &st); + })); + return st; + } + + static Status ScalarArrayArray(KernelContext* ctx, const Scalar& arg0, + const ArrayData& arg1, const ArrayData& arg2, + Datum* out) { + Status st = Status::OK(); + auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0); + ArrayIterator<Arg1Type> arg1_it(arg1); + ArrayIterator<Arg2Type> arg2_it(arg2); + RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue { + return Op::template Call<OutValue, Arg0Value, Arg1Value, Arg2Value>( + ctx, arg0_val, arg1_it(), arg2_it(), &st); + })); + return st; + } + + static Status ArrayScalarScalar(KernelContext* ctx, const ArrayData& arg0, + const Scalar& arg1, const Scalar& arg2, Datum* out) { + Status st = Status::OK(); + ArrayIterator<Arg0Type> arg0_it(arg0); + auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1); + auto arg2_val = UnboxScalar<Arg2Type>::Unbox(arg2); + RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue { + return Op::template Call<OutValue, Arg0Value, Arg1Value, Arg2Value>( + ctx, arg0_it(), arg1_val, arg2_val, &st); + })); + return st; + } + + static Status ScalarScalarArray(KernelContext* ctx, const Scalar& arg0, + const Scalar& arg1, const ArrayData& arg2, Datum* out) { + Status st = Status::OK(); + auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0); + auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1); + ArrayIterator<Arg2Type> arg2_it(arg2); + RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue { + return Op::template Call<OutValue, Arg0Value, Arg1Value, Arg2Value>( + ctx, arg0_val, arg1_val, arg2_it(), &st); + })); + return st; + } + + static Status ScalarArrayScalar(KernelContext* ctx, const Scalar& arg0, + const ArrayData& arg1, const Scalar& arg2, Datum* out) { + Status st = Status::OK(); + auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0); + ArrayIterator<Arg1Type> arg1_it(arg1); + auto arg2_val = UnboxScalar<Arg2Type>::Unbox(arg2); + RETURN_NOT_OK(OutputAdapter<OutType>::Write(ctx, out, [&]() -> OutValue { + return Op::template Call<OutValue, Arg0Value, Arg1Value, Arg2Value>( + ctx, arg0_val, arg1_it(), arg2_val, &st); + })); + return st; + } + + static Status ScalarScalarScalar(KernelContext* ctx, const Scalar& arg0, + const Scalar& arg1, const Scalar& arg2, Datum* out) { + Status st = Status::OK(); + if (out->scalar()->is_valid) { + auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0); + auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1); + auto arg2_val = UnboxScalar<Arg2Type>::Unbox(arg2); + BoxScalar<OutType>::Box( + Op::template Call<OutValue, Arg0Value, Arg1Value, Arg2Value>( + ctx, arg0_val, arg1_val, arg2_val, &st), + out->scalar().get()); + } + return st; + } + + static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { + if (batch[0].kind() == Datum::ARRAY) { + if (batch[1].kind() == Datum::ARRAY) { + if (batch[2].kind() == Datum::ARRAY) { + return ArrayArrayArray(ctx, *batch[0].array(), *batch[1].array(), + *batch[2].array(), out); + } else { + return ArrayArrayScalar(ctx, *batch[0].array(), *batch[1].array(), + *batch[2].scalar(), out); + } + } else { + if (batch[2].kind() == Datum::ARRAY) { + return ArrayScalarArray(ctx, *batch[0].array(), *batch[1].scalar(), + *batch[2].array(), out); + } else { + return ArrayScalarScalar(ctx, *batch[0].array(), *batch[1].scalar(), + *batch[2].scalar(), out); + } + } + } else { + if (batch[1].kind() == Datum::ARRAY) { + if (batch[2].kind() == Datum::ARRAY) { + return ScalarArrayArray(ctx, *batch[0].scalar(), *batch[1].array(), + *batch[2].array(), out); + } else { + return ScalarArrayScalar(ctx, *batch[0].scalar(), *batch[1].array(), + *batch[2].scalar(), out); + } + } else { + if (batch[2].kind() == Datum::ARRAY) { + return ScalarScalarArray(ctx, *batch[0].scalar(), *batch[1].scalar(), + *batch[2].array(), out); + } else { + return ScalarScalarScalar(ctx, *batch[0].scalar(), *batch[1].scalar(), + *batch[2].scalar(), out); + } + } + } + } +}; + +// An alternative to ScalarTernary that Applies a scalar operation with state on +// only the value pairs which are not-null in both arrays +template <typename OutType, typename Arg0Type, typename Arg1Type, typename Arg2Type, + typename Op> +struct ScalarTernaryNotNullStateful { Review comment: It seems that this is not used in this pull request? Should we remove this or use this (by adding support for decimal types)? ########## File path: cpp/src/arrow/util/bit_block_counter.h ########## @@ -586,5 +961,144 @@ static void VisitTwoBitBlocksVoid(const std::shared_ptr<Buffer>& left_bitmap_buf } } +template <typename VisitNotNull, typename VisitNull> +static void VisitThreeBitBlocksVoid( + const std::shared_ptr<Buffer>& left_bitmap_buf, int64_t left_offset, + const std::shared_ptr<Buffer>& mid_bitmap_buf, int64_t mid_offset, + const std::shared_ptr<Buffer>& right_bitmap_buf, int64_t right_offset, int64_t length, + VisitNotNull&& visit_not_null, VisitNull&& visit_null) { + if (((left_bitmap_buf == NULLPTR) && (mid_bitmap_buf == NULLPTR)) || + ((mid_bitmap_buf == NULLPTR) && (right_bitmap_buf == NULLPTR)) || + ((left_bitmap_buf == NULLPTR) && (right_bitmap_buf == NULLPTR))) { + // At most one bitmap is present + if ((left_bitmap_buf == NULLPTR) && (mid_bitmap_buf == NULLPTR)) { + return VisitBitBlocksVoid(right_bitmap_buf, right_offset, length, + std::forward<VisitNotNull>(visit_not_null), + std::forward<VisitNull>(visit_null)); + } else if ((mid_bitmap_buf == NULLPTR) && (right_bitmap_buf == NULLPTR)) { + return VisitBitBlocksVoid(left_bitmap_buf, left_offset, length, + std::forward<VisitNotNull>(visit_not_null), + std::forward<VisitNull>(visit_null)); + } else { + return VisitBitBlocksVoid(mid_bitmap_buf, mid_offset, length, + std::forward<VisitNotNull>(visit_not_null), + std::forward<VisitNull>(visit_null)); + } + } + // Two bitmaps are present + if (left_bitmap_buf == NULLPTR) { + const uint8_t* mid_bitmap = mid_bitmap_buf->data(); + const uint8_t* right_bitmap = right_bitmap_buf->data(); + BinaryBitBlockCounter bit_counter(mid_bitmap, mid_offset, right_bitmap, right_offset, + length); + int64_t position = 0; + while (position < length) { + BitBlockCount block = bit_counter.NextAndWord(); + if (block.AllSet()) { + for (int64_t i = 0; i < block.length; ++i, ++position) { + visit_not_null(position); + } + } else if (block.NoneSet()) { + for (int64_t i = 0; i < block.length; ++i, ++position) { + visit_null(); + } + } else { + for (int64_t i = 0; i < block.length; ++i, ++position) { + if (bit_util::GetBit(mid_bitmap, mid_offset + position) && + bit_util::GetBit(right_bitmap, right_offset + position)) { + visit_not_null(position); + } else { + visit_null(); + } + } + } + } + } else if (mid_bitmap_buf == NULLPTR) { + const uint8_t* left_bitmap = left_bitmap_buf->data(); + const uint8_t* right_bitmap = right_bitmap_buf->data(); + BinaryBitBlockCounter bit_counter(left_bitmap, left_offset, right_bitmap, + right_offset, length); + int64_t position = 0; + while (position < length) { + BitBlockCount block = bit_counter.NextAndWord(); + if (block.AllSet()) { + for (int64_t i = 0; i < block.length; ++i, ++position) { + visit_not_null(position); + } + } else if (block.NoneSet()) { + for (int64_t i = 0; i < block.length; ++i, ++position) { + visit_null(); + } + } else { + for (int64_t i = 0; i < block.length; ++i, ++position) { + if (bit_util::GetBit(left_bitmap, left_offset + position) && + bit_util::GetBit(right_bitmap, right_offset + position)) { + visit_not_null(position); + } else { + visit_null(); + } + } + } + } + } else { + const uint8_t* left_bitmap = left_bitmap_buf->data(); Review comment: Is this clause correct? It seems that `right_bitmap_buf` may be ignored. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org