Repository: arrow Updated Branches: refs/heads/master 4e77d3382 -> 067cd4ebf
ARROW-630: [C++] Create boolean batches for IPC testing, properly account for nonzero offset This fixes a couple bugs; boolean IPC was not being tested directly like the other types (it was implicitly by integration tests, though) Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #460 from wesm/ARROW-630 and squashes the following commits: f9448a7 [Wes McKinney] Create boolean batches for IPC testing, properly account for offset in unloading, comparison Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/067cd4eb Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/067cd4eb Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/067cd4eb Branch: refs/heads/master Commit: 067cd4ebfbd9be9b607658a2a249017cc6db84f9 Parents: 4e77d33 Author: Wes McKinney <wes.mckin...@twosigma.com> Authored: Fri Mar 31 13:00:11 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Fri Mar 31 13:00:11 2017 -0400 ---------------------------------------------------------------------- cpp/src/arrow/compare.cc | 4 +++- cpp/src/arrow/ipc/ipc-read-write-test.cc | 2 +- cpp/src/arrow/ipc/test-common.h | 22 ++++++++++++++++++++++ cpp/src/arrow/ipc/writer.cc | 7 ++++++- 4 files changed, 32 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/067cd4eb/cpp/src/arrow/compare.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index c2580b4..4cd617e 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -294,13 +294,15 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor { Status Visit(const BooleanArray& left) { const auto& right = static_cast<const BooleanArray&>(right_); + if (left.null_count() > 0) { const uint8_t* left_data = left.data()->data(); const uint8_t* right_data = right.data()->data(); for (int64_t i = 0; i < left.length(); ++i) { if (!left.IsNull(i) && - BitUtil::GetBit(left_data, i) != BitUtil::GetBit(right_data, i)) { + BitUtil::GetBit(left_data, i + left.offset()) != + BitUtil::GetBit(right_data, i + right.offset())) { result_ = false; return Status::OK(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/067cd4eb/cpp/src/arrow/ipc/ipc-read-write-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/ipc-read-write-test.cc b/cpp/src/arrow/ipc/ipc-read-write-test.cc index 74ca017..c900d0b 100644 --- a/cpp/src/arrow/ipc/ipc-read-write-test.cc +++ b/cpp/src/arrow/ipc/ipc-read-write-test.cc @@ -104,7 +104,7 @@ TEST_F(TestSchemaMetadata, NestedFields) { ::testing::Values(&MakeIntRecordBatch, &MakeListRecordBatch, &MakeNonNullRecordBatch, \ &MakeZeroLengthRecordBatch, &MakeDeeplyNestedList, &MakeStringTypesRecordBatch, \ &MakeStruct, &MakeUnion, &MakeDictionary, &MakeDates, &MakeTimestamps, &MakeTimes, \ - &MakeFWBinary); + &MakeFWBinary, &MakeBooleanBatch); class IpcTestFixture : public io::MemoryMapFixture { public: http://git-wip-us.apache.org/repos/asf/arrow/blob/067cd4eb/cpp/src/arrow/ipc/test-common.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h index 583f909..134a5ca 100644 --- a/cpp/src/arrow/ipc/test-common.h +++ b/cpp/src/arrow/ipc/test-common.h @@ -138,6 +138,28 @@ Status MakeRandomListArray(const std::shared_ptr<Array>& child_array, int num_li typedef Status MakeRecordBatch(std::shared_ptr<RecordBatch>* out); +Status MakeBooleanBatch(std::shared_ptr<RecordBatch>* out) { + const int length = 1000; + + // Make the schema + auto f0 = field("f0", boolean()); + auto f1 = field("f1", boolean()); + std::shared_ptr<Schema> schema(new Schema({f0, f1})); + + std::vector<uint8_t> values(length); + std::vector<uint8_t> valid_bytes(length); + test::random_null_bytes(length, 0.5, values.data()); + test::random_null_bytes(length, 0.1, valid_bytes.data()); + + auto data = test::bytes_to_null_buffer(values); + auto null_bitmap = test::bytes_to_null_buffer(valid_bytes); + + auto a0 = std::make_shared<BooleanArray>(length, data, null_bitmap, -1); + auto a1 = std::make_shared<BooleanArray>(length, data, nullptr, 0); + out->reset(new RecordBatch(schema, length, {a0, a1})); + return Status::OK(); +} + Status MakeIntRecordBatch(std::shared_ptr<RecordBatch>* out) { const int length = 10; http://git-wip-us.apache.org/repos/asf/arrow/blob/067cd4eb/cpp/src/arrow/ipc/writer.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc index 249ef20..0867382 100644 --- a/cpp/src/arrow/ipc/writer.cc +++ b/cpp/src/arrow/ipc/writer.cc @@ -281,7 +281,12 @@ class RecordBatchWriter : public ArrayVisitor { } Status Visit(const BooleanArray& array) override { - buffers_.push_back(array.data()); + std::shared_ptr<Buffer> bits = array.data(); + if (array.offset() != 0) { + RETURN_NOT_OK( + CopyBitmap(pool_, bits->data(), array.offset(), array.length(), &bits)); + } + buffers_.push_back(bits); return Status::OK(); }