pitrou commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528714541
##########
File path: cpp/src/arrow/compute/exec.cc
##########
@@ -213,20 +214,33 @@ bool ExecBatchIterator::Next(ExecBatch* batch) {
namespace {
-bool ArrayHasNulls(const ArrayData& data) {
- // As discovered in ARROW-8863 (and not only for that reason)
- // ArrayData::null_count can -1 even when buffers[0] is nullptr. So we check
- // for both cases (nullptr means no nulls, or null_count already computed)
- if (data.type->id() == Type::NA) {
- return true;
- } else if (data.buffers[0] == nullptr) {
- return false;
- } else {
+struct NullGeneralization {
+ enum type { NONE, ALL_VALID, ALL_NULL };
Review comment:
Rather than "NONE", "UNKNOWN" (or "PERHAPS_NULL") sounds less confusing
perhaps.
##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
##########
@@ -27,185 +27,114 @@
#include "arrow/compute/kernels/test_util.h"
#include "arrow/testing/gtest_common.h"
#include "arrow/testing/gtest_util.h"
+#include "arrow/util/checked_cast.h"
namespace arrow {
+
+using internal::checked_pointer_cast;
+
namespace compute {
-using BinaryKernelFunc =
- std::function<Result<Datum>(const Datum&, const Datum&, ExecContext*)>;
-
-class TestBooleanKernel : public TestBase {
- public:
- void TestArrayBinary(const BinaryKernelFunc& kernel, const
std::shared_ptr<Array>& left,
- const std::shared_ptr<Array>& right,
- const std::shared_ptr<Array>& expected) {
- ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
- ASSERT_EQ(Datum::ARRAY, result.kind());
- std::shared_ptr<Array> result_array = result.make_array();
- ASSERT_OK(result_array->ValidateFull());
- AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-
- ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
- ASSERT_EQ(Datum::ARRAY, result.kind());
- result_array = result.make_array();
- ASSERT_OK(result_array->ValidateFull());
- AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
- }
+void CheckBooleanScalarArrayBinary(std::string func_name, Datum array) {
+ for (std::shared_ptr<Scalar> scalar :
+ {std::make_shared<BooleanScalar>(),
std::make_shared<BooleanScalar>(true),
+ std::make_shared<BooleanScalar>(false)}) {
+ ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func_name, {Datum(scalar),
array}));
- void TestChunkedArrayBinary(const BinaryKernelFunc& kernel,
- const std::shared_ptr<ChunkedArray>& left,
- const std::shared_ptr<ChunkedArray>& right,
- const std::shared_ptr<ChunkedArray>& expected) {
- ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
- ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
- std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
- AssertChunkedEquivalent(*expected, *result_ca);
-
- ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
- ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
- result_ca = result.chunked_array();
- AssertChunkedEquivalent(*expected, *result_ca);
- }
+ ASSERT_OK_AND_ASSIGN(auto constant_array,
+ MakeArrayFromScalar(*scalar, array.length()));
- void TestBinaryKernel(const BinaryKernelFunc& kernel,
- const std::shared_ptr<Array>& left,
- const std::shared_ptr<Array>& right,
- const std::shared_ptr<Array>& expected) {
- TestArrayBinary(kernel, left, right, expected);
- TestArrayBinary(kernel, left->Slice(1), right->Slice(1),
expected->Slice(1));
-
- // ChunkedArray
- auto cleft = std::make_shared<ChunkedArray>(ArrayVector{left,
left->Slice(1)});
- auto cright = std::make_shared<ChunkedArray>(ArrayVector{right,
right->Slice(1)});
- auto cexpected =
- std::make_shared<ChunkedArray>(ArrayVector{expected,
expected->Slice(1)});
- TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-
- // ChunkedArray with different chunks
- cleft = std::make_shared<ChunkedArray>(ArrayVector{
- left->Slice(0, 1), left->Slice(1), left->Slice(1, 1), left->Slice(2)});
- TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
- }
+ ASSERT_OK_AND_ASSIGN(Datum expected,
+ CallFunction(func_name, {Datum(constant_array),
array}));
+ AssertDatumsEqual(expected, actual);
- void TestBinaryKernelPropagate(const BinaryKernelFunc& kernel,
- const std::vector<bool>& left,
- const std::vector<bool>& right,
- const std::vector<bool>& expected,
- const std::vector<bool>& expected_nulls) {
- auto type = boolean();
- TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, {}),
- _MakeArray<BooleanType, bool>(type, right, {}),
- _MakeArray<BooleanType, bool>(type, expected, {}));
-
- TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, left),
- _MakeArray<BooleanType, bool>(type, right, right),
- _MakeArray<BooleanType, bool>(type, expected,
expected_nulls));
+ ASSERT_OK_AND_ASSIGN(actual, CallFunction(func_name, {array,
Datum(scalar)}));
+ ASSERT_OK_AND_ASSIGN(expected,
+ CallFunction(func_name, {array,
Datum(constant_array)}));
+ AssertDatumsEqual(expected, actual);
}
-
- protected:
- ExecContext ctx_;
-};
-
-TEST_F(TestBooleanKernel, Invert) {
- std::vector<bool> values1 = {true, false, true, false};
- std::vector<bool> values2 = {false, true, false, true};
-
- auto type = boolean();
- auto a1 = _MakeArray<BooleanType, bool>(type, values1, {true, true, true,
false});
- auto a2 = _MakeArray<BooleanType, bool>(type, values2, {true, true, true,
false});
-
- // Plain array
- ASSERT_OK_AND_ASSIGN(Datum result, Invert(a1));
- ASSERT_EQ(Datum::ARRAY, result.kind());
- ASSERT_ARRAYS_EQUAL(*a2, *result.make_array());
-
- // Array with offset
- ASSERT_OK_AND_ASSIGN(result, Invert(a1->Slice(1)));
- ASSERT_EQ(Datum::ARRAY, result.kind());
- ASSERT_ARRAYS_EQUAL(*a2->Slice(1), *result.make_array());
-
- // ChunkedArray
- std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
- auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
- std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
- auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
- ASSERT_OK_AND_ASSIGN(result, Invert(ca1));
- ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
- std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-
- // Contiguous preallocation, so a single output chunk even though there were
- // two input chunks
- ASSERT_EQ(1, result_ca->num_chunks());
- AssertChunkedEquivalent(*ca2, *result_ca);
}
-TEST_F(TestBooleanKernel, InvertEmptyArray) {
- std::vector<std::shared_ptr<Buffer>> data_buffers(2);
- Datum input;
- input.value = ArrayData::Make(boolean(), 0 /* length */,
std::move(data_buffers),
- 0 /* null_count */);
-
- ASSERT_OK_AND_ASSIGN(Datum result, Invert(input));
- ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, Invert) {
+ auto arr =
+ ArrayFromJSON(boolean(), "[true, false, true, null, false, true, false,
null]");
+ auto expected =
+ ArrayFromJSON(boolean(), "[false, true, false, null, true, false, true,
null]");
+ CheckScalarUnary("invert", arr, expected);
}
-TEST_F(TestBooleanKernel, BinaryOpOnEmptyArray) {
- auto type = boolean();
- std::vector<std::shared_ptr<Buffer>> data_buffers(2);
- Datum input;
- input.value = ArrayData::Make(boolean(), 0 /* length */,
std::move(data_buffers),
- 0 /* null_count */);
-
- ASSERT_OK_AND_ASSIGN(Datum result, And(input, input));
- // Result should be empty as well.
- ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, And) {
+ auto left = ArrayFromJSON(boolean(), " [true, true, true, false, false,
null]");
+ auto right = ArrayFromJSON(boolean(), " [true, false, null, false, null,
null]");
+ auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, null,
null]");
+ // CheckScalarBinary("and", left, right, expected);
+ CheckBooleanScalarArrayBinary("and", left);
}
-TEST_F(TestBooleanKernel, And) {
- std::vector<bool> values1 = {true, false, true, false, true, true};
- std::vector<bool> values2 = {true, true, false, false, true, false};
- std::vector<bool> values3 = {true, false, false, false, true, false};
- TestBinaryKernelPropagate(And, values1, values2, values3, values3);
+TEST(TestBooleanKernel, Or) {
+ auto left = ArrayFromJSON(boolean(), " [true, true, true, false, false,
null]");
+ auto right = ArrayFromJSON(boolean(), " [true, false, null, false, null,
null]");
+ auto expected = ArrayFromJSON(boolean(), "[true, true, null, false, null,
null]");
+ CheckScalarBinary("or", left, right, expected);
+ CheckBooleanScalarArrayBinary("or", left);
}
-TEST_F(TestBooleanKernel, Or) {
- std::vector<bool> values1 = {true, false, true, false, true, true};
- std::vector<bool> values2 = {true, true, false, false, true, false};
- std::vector<bool> values3 = {true, true, true, false, true, true};
- std::vector<bool> values3_nulls = {true, false, false, false, true, false};
- TestBinaryKernelPropagate(Or, values1, values2, values3, values3_nulls);
+TEST(TestBooleanKernel, Xor) {
+ auto left = ArrayFromJSON(boolean(), " [true, true, true, false, false,
null]");
+ auto right = ArrayFromJSON(boolean(), " [true, false, null, false, null,
null]");
+ auto expected = ArrayFromJSON(boolean(), "[false, true, null, false, null,
null]");
+ CheckScalarBinary("xor", left, right, expected);
+ CheckBooleanScalarArrayBinary("xor", left);
}
-TEST_F(TestBooleanKernel, Xor) {
- std::vector<bool> values1 = {true, false, true, false, true, true};
- std::vector<bool> values2 = {true, true, false, false, true, false};
- std::vector<bool> values3 = {false, true, true, false, false, true};
- std::vector<bool> values3_nulls = {true, false, false, false, true, false};
- TestBinaryKernelPropagate(Xor, values1, values2, values3, values3_nulls);
+TEST(TestBooleanKernel, AndNot) {
+ auto left = ArrayFromJSON(
+ boolean(), "[true, true, true, false, false, false, null, null,
null]");
+ auto right = ArrayFromJSON(
+ boolean(), "[true, false, null, true, false, null, true, false,
null]");
+ auto expected = ArrayFromJSON(
+ boolean(), "[false, true, null, false, false, null, null, null,
null]");
+ CheckScalarBinary("and_not", left, right, expected);
+ CheckBooleanScalarArrayBinary("and_not", left);
}
-TEST_F(TestBooleanKernel, KleeneAnd) {
+TEST(TestBooleanKernel, KleeneAnd) {
auto left = ArrayFromJSON(boolean(), " [true, true, true, false, false,
null]");
auto right = ArrayFromJSON(boolean(), " [true, false, null, false, null,
null]");
auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, false,
null]");
- TestBinaryKernel(KleeneAnd, left, right, expected);
+ CheckScalarBinary("and_kleene", left, right, expected);
+ CheckBooleanScalarArrayBinary("and_kleene", left);
left = ArrayFromJSON(boolean(), " [true, true, false, null, null]");
right = ArrayFromJSON(boolean(), " [true, false, false, true, false]");
expected = ArrayFromJSON(boolean(), "[true, false, false, null, false]");
- TestBinaryKernel(KleeneAnd, left, right, expected);
+ CheckScalarBinary("and_kleene", left, right, expected);
+ CheckBooleanScalarArrayBinary("and_kleene", left);
Review comment:
Are you running almost the same tests twice here?
##########
File path: cpp/src/arrow/compute/exec.cc
##########
@@ -272,79 +287,61 @@ class NullPropagator {
return Status::OK();
}
- Result<bool> ShortCircuitIfAllNull() {
- // An all-null value (scalar null or all-null array) gives us a short
- // circuit opportunity
- bool is_all_null = false;
- std::shared_ptr<Buffer> all_null_bitmap;
+ Status AllNullShortCircuit() {
+ // OK, the output should be all null
+ output_->null_count = output_->length;
+
+ if (bitmap_preallocated_) {
+ BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, false);
+ return Status::OK();
+ }
// Walk all the values with nulls instead of breaking on the first in case
// we find a bitmap that can be reused in the non-preallocated case
- for (const Datum* value : values_with_nulls_) {
- if (value->type()->id() == Type::NA) {
- // No bitmap
- is_all_null = true;
- } else if (value->kind() == Datum::ARRAY) {
- const ArrayData& arr = *value->array();
- if (arr.null_count.load() == arr.length) {
- // Pluck the all null bitmap so we can set it in the output if it was
- // not pre-allocated
- all_null_bitmap = arr.buffers[0];
- is_all_null = true;
- }
- } else {
- // Scalar
- is_all_null = !value->scalar()->is_valid;
+ for (const ArrayData* arr : arrays_with_nulls_) {
+ if (arr->null_count.load() == arr->length && arr->buffers[0] != nullptr)
{
+ // Reuse this all null bitmap
+ output_->buffers[0] = arr->buffers[0];
+ return Status::OK();
}
}
- if (!is_all_null) {
- return false;
- }
- // OK, the output should be all null
- output_->null_count = output_->length;
-
- if (!bitmap_preallocated_ && all_null_bitmap) {
- // If we did not pre-allocate memory, and we observed an all-null bitmap,
- // then we can zero-copy it into the output
- output_->buffers[0] = std::move(all_null_bitmap);
- } else {
- RETURN_NOT_OK(EnsureAllocated());
- BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, false);
- }
- return true;
+ RETURN_NOT_OK(EnsureAllocated());
+ BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, false);
+ return Status::OK();
}
Status PropagateSingle() {
// One array
- const ArrayData& arr = *values_with_nulls_[0]->array();
+ const ArrayData& arr = *arrays_with_nulls_[0];
const std::shared_ptr<Buffer>& arr_bitmap = arr.buffers[0];
// Reuse the null count if it's known
output_->null_count = arr.null_count.load();
if (bitmap_preallocated_) {
CopyBitmap(arr_bitmap->data(), arr.offset, arr.length, bitmap_,
output_->offset);
+ return Status::OK();
+ }
+
+ // Two cases when memory was not pre-allocated:
+ //
+ // * Offset is zero: we reuse the bitmap as is
+ // * Offset is nonzero but a multiple of 8: we can slice the bitmap
+ // * Offset is not a multiple of 8: we must allocate and use CopyBitmap
+ //
+ // Keep in mind that output_->offset is not permitted to be nonzero when
+ // the bitmap is not preallocated, and that precondition is asserted
+ // higher in the call stack.
+ if (arr.offset == 0) {
+ output_->buffers[0] = arr_bitmap;
+ } else if (arr.offset % 8 == 0) {
+ output_->buffers[0] =
+ SliceBuffer(arr_bitmap, arr.offset / 8,
BitUtil::BytesForBits(arr.length));
Review comment:
Would it be useful to add a `SliceBitmap` helper to `bitmap_ops.h`?
##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -427,12 +427,28 @@ void SimpleUnary(KernelContext* ctx, const ExecBatch&
batch, Datum* out) {
//
// static void Call(KernelContext*, const ArrayData& arg0, const ArrayData&
arg1,
// ArrayData* out)
+// static void Call(KernelContext*, const ArrayData& arg0, const Scalar& arg1,
+// ArrayData* out)
+// static void Call(KernelContext*, const Scalar& arg0, const ArrayData& arg1,
+// ArrayData* out)
+// static void Call(KernelContext*, const Scalar& arg0, const Scalar& arg1,
+// Scalar* out)
Review comment:
Mention that heterogenous implementation ((scalar, array) and (array,
scalar)) must broadcast the scalar value?
##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
##########
@@ -27,185 +27,114 @@
#include "arrow/compute/kernels/test_util.h"
#include "arrow/testing/gtest_common.h"
#include "arrow/testing/gtest_util.h"
+#include "arrow/util/checked_cast.h"
namespace arrow {
+
+using internal::checked_pointer_cast;
+
namespace compute {
-using BinaryKernelFunc =
- std::function<Result<Datum>(const Datum&, const Datum&, ExecContext*)>;
-
-class TestBooleanKernel : public TestBase {
- public:
- void TestArrayBinary(const BinaryKernelFunc& kernel, const
std::shared_ptr<Array>& left,
- const std::shared_ptr<Array>& right,
- const std::shared_ptr<Array>& expected) {
- ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
- ASSERT_EQ(Datum::ARRAY, result.kind());
- std::shared_ptr<Array> result_array = result.make_array();
- ASSERT_OK(result_array->ValidateFull());
- AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-
- ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
- ASSERT_EQ(Datum::ARRAY, result.kind());
- result_array = result.make_array();
- ASSERT_OK(result_array->ValidateFull());
- AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
- }
+void CheckBooleanScalarArrayBinary(std::string func_name, Datum array) {
+ for (std::shared_ptr<Scalar> scalar :
+ {std::make_shared<BooleanScalar>(),
std::make_shared<BooleanScalar>(true),
+ std::make_shared<BooleanScalar>(false)}) {
+ ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func_name, {Datum(scalar),
array}));
- void TestChunkedArrayBinary(const BinaryKernelFunc& kernel,
- const std::shared_ptr<ChunkedArray>& left,
- const std::shared_ptr<ChunkedArray>& right,
- const std::shared_ptr<ChunkedArray>& expected) {
- ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
- ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
- std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
- AssertChunkedEquivalent(*expected, *result_ca);
-
- ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
- ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
- result_ca = result.chunked_array();
- AssertChunkedEquivalent(*expected, *result_ca);
- }
+ ASSERT_OK_AND_ASSIGN(auto constant_array,
+ MakeArrayFromScalar(*scalar, array.length()));
- void TestBinaryKernel(const BinaryKernelFunc& kernel,
- const std::shared_ptr<Array>& left,
- const std::shared_ptr<Array>& right,
- const std::shared_ptr<Array>& expected) {
- TestArrayBinary(kernel, left, right, expected);
- TestArrayBinary(kernel, left->Slice(1), right->Slice(1),
expected->Slice(1));
-
- // ChunkedArray
- auto cleft = std::make_shared<ChunkedArray>(ArrayVector{left,
left->Slice(1)});
- auto cright = std::make_shared<ChunkedArray>(ArrayVector{right,
right->Slice(1)});
- auto cexpected =
- std::make_shared<ChunkedArray>(ArrayVector{expected,
expected->Slice(1)});
- TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-
- // ChunkedArray with different chunks
- cleft = std::make_shared<ChunkedArray>(ArrayVector{
- left->Slice(0, 1), left->Slice(1), left->Slice(1, 1), left->Slice(2)});
- TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
- }
+ ASSERT_OK_AND_ASSIGN(Datum expected,
+ CallFunction(func_name, {Datum(constant_array),
array}));
+ AssertDatumsEqual(expected, actual);
- void TestBinaryKernelPropagate(const BinaryKernelFunc& kernel,
- const std::vector<bool>& left,
- const std::vector<bool>& right,
- const std::vector<bool>& expected,
- const std::vector<bool>& expected_nulls) {
- auto type = boolean();
- TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, {}),
- _MakeArray<BooleanType, bool>(type, right, {}),
- _MakeArray<BooleanType, bool>(type, expected, {}));
-
- TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, left),
- _MakeArray<BooleanType, bool>(type, right, right),
- _MakeArray<BooleanType, bool>(type, expected,
expected_nulls));
+ ASSERT_OK_AND_ASSIGN(actual, CallFunction(func_name, {array,
Datum(scalar)}));
+ ASSERT_OK_AND_ASSIGN(expected,
+ CallFunction(func_name, {array,
Datum(constant_array)}));
+ AssertDatumsEqual(expected, actual);
}
-
- protected:
- ExecContext ctx_;
-};
-
-TEST_F(TestBooleanKernel, Invert) {
- std::vector<bool> values1 = {true, false, true, false};
- std::vector<bool> values2 = {false, true, false, true};
-
- auto type = boolean();
- auto a1 = _MakeArray<BooleanType, bool>(type, values1, {true, true, true,
false});
- auto a2 = _MakeArray<BooleanType, bool>(type, values2, {true, true, true,
false});
-
- // Plain array
- ASSERT_OK_AND_ASSIGN(Datum result, Invert(a1));
- ASSERT_EQ(Datum::ARRAY, result.kind());
- ASSERT_ARRAYS_EQUAL(*a2, *result.make_array());
-
- // Array with offset
- ASSERT_OK_AND_ASSIGN(result, Invert(a1->Slice(1)));
- ASSERT_EQ(Datum::ARRAY, result.kind());
- ASSERT_ARRAYS_EQUAL(*a2->Slice(1), *result.make_array());
-
- // ChunkedArray
- std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
- auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
- std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
- auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
- ASSERT_OK_AND_ASSIGN(result, Invert(ca1));
- ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
- std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-
- // Contiguous preallocation, so a single output chunk even though there were
- // two input chunks
- ASSERT_EQ(1, result_ca->num_chunks());
- AssertChunkedEquivalent(*ca2, *result_ca);
}
-TEST_F(TestBooleanKernel, InvertEmptyArray) {
- std::vector<std::shared_ptr<Buffer>> data_buffers(2);
- Datum input;
- input.value = ArrayData::Make(boolean(), 0 /* length */,
std::move(data_buffers),
- 0 /* null_count */);
-
- ASSERT_OK_AND_ASSIGN(Datum result, Invert(input));
- ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, Invert) {
+ auto arr =
+ ArrayFromJSON(boolean(), "[true, false, true, null, false, true, false,
null]");
+ auto expected =
+ ArrayFromJSON(boolean(), "[false, true, false, null, true, false, true,
null]");
+ CheckScalarUnary("invert", arr, expected);
}
-TEST_F(TestBooleanKernel, BinaryOpOnEmptyArray) {
- auto type = boolean();
- std::vector<std::shared_ptr<Buffer>> data_buffers(2);
- Datum input;
- input.value = ArrayData::Make(boolean(), 0 /* length */,
std::move(data_buffers),
- 0 /* null_count */);
-
- ASSERT_OK_AND_ASSIGN(Datum result, And(input, input));
- // Result should be empty as well.
- ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, And) {
+ auto left = ArrayFromJSON(boolean(), " [true, true, true, false, false,
null]");
+ auto right = ArrayFromJSON(boolean(), " [true, false, null, false, null,
null]");
+ auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, null,
null]");
+ // CheckScalarBinary("and", left, right, expected);
Review comment:
Is there a reason this is commented out?
##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean.cc
##########
@@ -61,40 +59,78 @@ void ComputeKleene(ComputeWord&& compute_word,
KernelContext* ctx, const ArrayDa
++i;
};
- if (right.null_count == 0 || left.null_count == 0) {
- if (left.null_count == 0) {
- // ensure only bitmaps[RIGHT_VALID].buffer might be null
- std::swap(bitmaps[LEFT_VALID], bitmaps[RIGHT_VALID]);
- std::swap(bitmaps[LEFT_DATA], bitmaps[RIGHT_DATA]);
- }
- // override bitmaps[RIGHT_VALID] to make it safe for Visit()
+ if (right.null_count == 0) {
+ // bitmaps[RIGHT_VALID] might be null; override to make it safe for Visit()
bitmaps[RIGHT_VALID] = bitmaps[RIGHT_DATA];
-
Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
apply(words[LEFT_VALID], words[LEFT_DATA], ~uint64_t(0),
words[RIGHT_DATA]);
});
- } else {
+ return;
+ }
+
+ if (left.null_count == 0) {
+ // bitmaps[LEFT_VALID] might be null; override to make it safe for Visit()
+ bitmaps[LEFT_VALID] = bitmaps[LEFT_DATA];
Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
- apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID],
words[RIGHT_DATA]);
+ apply(~uint64_t(0), words[LEFT_DATA], words[RIGHT_VALID],
words[RIGHT_DATA]);
});
+ return;
}
+
+ DCHECK(left.null_count != 0 || right.null_count != 0);
+ Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
+ apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID],
words[RIGHT_DATA]);
+ });
+}
+
+inline BooleanScalar InvertScalar(const Scalar& in) {
+ return in.is_valid ? BooleanScalar(!checked_cast<const
BooleanScalar&>(in).value)
+ : BooleanScalar();
+}
+
+inline Bitmap GetBitmap(const ArrayData& arr, int index) {
+ return Bitmap{arr.buffers[index], arr.offset, arr.length};
}
struct Invert {
static void Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
- if (in.is_valid) {
- checked_cast<BooleanScalar*>(out)->value =
- !checked_cast<const BooleanScalar&>(in).value;
- }
+ *checked_cast<BooleanScalar*>(out) = InvertScalar(in);
}
static void Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
- ::arrow::internal::InvertBitmap(in.buffers[1]->data(), in.offset,
in.length,
- out->buffers[1]->mutable_data(),
out->offset);
+ GetBitmap(*out, 1).CopyFromInverted(GetBitmap(in, 1));
}
};
-struct And {
+template <typename Op>
+struct Commutative {
+ static void Call(KernelContext* ctx, const Scalar& left, const ArrayData&
right,
+ ArrayData* out) {
+ Op::Call(ctx, right, left, out);
+ }
+};
+
+struct And : Commutative<And> {
+ using Commutative<And>::Call;
+
+ static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+ Scalar* out) {
+ if (left.is_valid && right.is_valid) {
+ checked_cast<BooleanScalar*>(out)->value =
+ checked_cast<const BooleanScalar&>(left).value &&
+ checked_cast<const BooleanScalar&>(right).value;
+ }
+ }
+
+ static void Call(KernelContext* ctx, const ArrayData& left, const Scalar&
right,
+ ArrayData* out) {
+ if (!right.is_valid) return; // all null case
+
+ return checked_cast<const BooleanScalar&>(right).value
+ ? GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1))
Review comment:
Hmm... Perhaps we can avoid a copy in most cases and just re-use the
bitmap buffer?
(do we need a `BitmapSliceOrCopy` helper?)
##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean.cc
##########
@@ -156,14 +307,94 @@ struct Xor {
}
};
+struct AndNot {
+ static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+ Scalar* out) {
+ And::Call(ctx, left, InvertScalar(right), out);
+ }
+
+ static void Call(KernelContext* ctx, const Scalar& left, const ArrayData&
right,
+ ArrayData* out) {
+ if (!left.is_valid) return; // all null case
+
+ return checked_cast<const BooleanScalar&>(left).value
+ ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1))
+ : GetBitmap(*out, 1).SetBitsTo(false);
+ }
+
+ static void Call(KernelContext* ctx, const ArrayData& left, const Scalar&
right,
+ ArrayData* out) {
+ And::Call(ctx, left, InvertScalar(right), out);
+ }
+
+ static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData&
right,
+ ArrayData* out) {
+ ::arrow::internal::BitmapAndNot(left.buffers[1]->data(), left.offset,
+ right.buffers[1]->data(), right.offset,
right.length,
+ out->offset,
out->buffers[1]->mutable_data());
+ }
+};
+
+struct KleeneAndNot {
+ static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+ Scalar* out) {
+ KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+ }
+
+ static void Call(KernelContext* ctx, const Scalar& left, const ArrayData&
right,
+ ArrayData* out) {
+ bool left_true = left.is_valid && checked_cast<const
BooleanScalar&>(left).value;
+ bool left_false = left.is_valid && !checked_cast<const
BooleanScalar&>(left).value;
+
+ if (left_false) {
+ return GetBitmap(*out, 0).SetBitsTo(true),
+ GetBitmap(*out, 1).SetBitsTo(false); // all false case
+ }
+
+ if (left_true) {
+ return GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0)),
+ GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1));
+ }
+
+ // scalar was null: out[i] is valid iff right[i] was true
+ ::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset,
+ right.buffers[1]->data(), right.offset,
right.length,
+ out->offset, out->buffers[0]->mutable_data());
+ ::arrow::internal::InvertBitmap(right.buffers[1]->data(), right.offset,
right.length,
+ out->buffers[1]->mutable_data(),
out->offset);
+ }
+
+ static void Call(KernelContext* ctx, const ArrayData& left, const Scalar&
right,
+ ArrayData* out) {
+ KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+ }
+
+ static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData&
right,
+ ArrayData* out) {
+ if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
+ GetBitmap(*out, 0).SetBitsTo(true);
+ return AndNot::Call(ctx, left, right, out);
+ }
+
+ static auto compute_word = [](uint64_t left_true, uint64_t left_false,
+ uint64_t right_true, uint64_t right_false,
+ uint64_t* out_valid, uint64_t* out_data) {
+ *out_data = left_true & right_false;
+ *out_valid = left_false | right_true | (left_true & right_false);
Review comment:
I would expect `~right_false` to be involved here. Am I missing
something? Otherwise, the tests should also be expanded to exercise this code
path.
##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean.cc
##########
@@ -129,13 +222,51 @@ struct Or {
}
};
-struct KleeneOr {
+struct KleeneOr : Commutative<KleeneOr> {
+ using Commutative<KleeneOr>::Call;
+
+ static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+ Scalar* out) {
+ bool left_true = left.is_valid && checked_cast<const
BooleanScalar&>(left).value;
+ bool left_false = left.is_valid && !checked_cast<const
BooleanScalar&>(left).value;
+
+ bool right_true = right.is_valid && checked_cast<const
BooleanScalar&>(right).value;
+ bool right_false = right.is_valid && !checked_cast<const
BooleanScalar&>(right).value;
+
+ checked_cast<BooleanScalar*>(out)->value = left_true || right_true;
+ out->is_valid = left_true || right_true || (left_false && right_false);
+ }
+
+ static void Call(KernelContext* ctx, const ArrayData& left, const Scalar&
right,
+ ArrayData* out) {
+ bool right_true = right.is_valid && checked_cast<const
BooleanScalar&>(right).value;
+ bool right_false = right.is_valid && !checked_cast<const
BooleanScalar&>(right).value;
+
+ if (right_true) {
+ return GetBitmap(*out, 0).SetBitsTo(true),
+ GetBitmap(*out, 1).SetBitsTo(true); // all true case
+ }
+
+ if (right_false) {
+ return GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0)),
+ GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+ }
+
+ // scalar was null: out[i] is valid iff left[i] was true
+ ::arrow::internal::BitmapAnd(left.buffers[0]->data(), left.offset,
+ left.buffers[1]->data(), left.offset,
left.length,
+ out->offset, out->buffers[0]->mutable_data());
+ ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset,
left.length,
+ out->buffers[1]->mutable_data(),
out->offset);
+ }
+
static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData&
right,
ArrayData* out) {
if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
- BitUtil::SetBitsTo(out->buffers[0]->mutable_data(), out->offset,
out->length, true);
+ GetBitmap(*out, 0).SetBitsTo(true);
Review comment:
Should you make the same change in `KleeneAnd::Call(KernelContext*,
const ArrayData&, const ArrayData&, ArrayData*)`?
##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean.cc
##########
@@ -61,40 +59,78 @@ void ComputeKleene(ComputeWord&& compute_word,
KernelContext* ctx, const ArrayDa
++i;
};
- if (right.null_count == 0 || left.null_count == 0) {
- if (left.null_count == 0) {
- // ensure only bitmaps[RIGHT_VALID].buffer might be null
- std::swap(bitmaps[LEFT_VALID], bitmaps[RIGHT_VALID]);
- std::swap(bitmaps[LEFT_DATA], bitmaps[RIGHT_DATA]);
- }
- // override bitmaps[RIGHT_VALID] to make it safe for Visit()
+ if (right.null_count == 0) {
+ // bitmaps[RIGHT_VALID] might be null; override to make it safe for Visit()
bitmaps[RIGHT_VALID] = bitmaps[RIGHT_DATA];
-
Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
apply(words[LEFT_VALID], words[LEFT_DATA], ~uint64_t(0),
words[RIGHT_DATA]);
});
- } else {
+ return;
+ }
+
+ if (left.null_count == 0) {
+ // bitmaps[LEFT_VALID] might be null; override to make it safe for Visit()
+ bitmaps[LEFT_VALID] = bitmaps[LEFT_DATA];
Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
- apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID],
words[RIGHT_DATA]);
+ apply(~uint64_t(0), words[LEFT_DATA], words[RIGHT_VALID],
words[RIGHT_DATA]);
});
+ return;
}
+
+ DCHECK(left.null_count != 0 || right.null_count != 0);
Review comment:
You mean `&&`?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]