Repository: arrow Updated Branches: refs/heads/master e333576a0 -> d54ab9a23
ARROW-737: [C++] Enable mutable buffer slices, SliceMutableBuffer function This patch also results in better microperformance on my laptop. ``` Run on (4 X 1200 MHz CPU s) 2017-04-01 22:35:30 Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------- BM_WriteRecordBatch/1/min_time:1.000/real_time 53617 ns 53423 ns 26903 18.2136GB/s BM_WriteRecordBatch/4/min_time:1.000/real_time 48118 ns 47999 ns 26823 20.295GB/s BM_WriteRecordBatch/16/min_time:1.000/real_time 46679 ns 46562 ns 30409 20.9209GB/s BM_WriteRecordBatch/64/min_time:1.000/real_time 50583 ns 50344 ns 27308 19.3061GB/s BM_WriteRecordBatch/256/min_time:1.000/real_time 93474 ns 93259 ns 11720 10.4474GB/s BM_WriteRecordBatch/1024/min_time:1.000/real_time 254056 ns 253461 ns 5434 3.84389GB/s BM_WriteRecordBatch/4k/min_time:1.000/real_time 892292 ns 888924 ns 1210 1120.71MB/s BM_WriteRecordBatch/8k/min_time:1.000/real_time 1799323 ns 1795023 ns 754 555.764MB/s BM_ReadRecordBatch/1/min_time:1.000/real_time 2501 ns 2452 ns 586633 390.521GB/s BM_ReadRecordBatch/4/min_time:1.000/real_time 6921 ns 5577 ns 227670 141.095GB/s BM_ReadRecordBatch/16/min_time:1.000/real_time 15561 ns 14505 ns 67493 62.758GB/s BM_ReadRecordBatch/64/min_time:1.000/real_time 48583 ns 48242 ns 30070 20.1008GB/s BM_ReadRecordBatch/256/min_time:1.000/real_time 184637 ns 183306 ns 6660 5.2891GB/s BM_ReadRecordBatch/1024/min_time:1.000/real_time 734128 ns 729692 ns 1905 1.33024GB/s BM_ReadRecordBatch/4k/min_time:1.000/real_time 3027028 ns 3008020 ns 445 330.357MB/s BM_ReadRecordBatch/8k/min_time:1.000/real_time 6472022 ns 6426801 ns 211 154.511MB/s ``` before ``` Run on (4 X 1200 MHz CPU s) 2017-04-01 22:37:36 Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------- BM_WriteRecordBatch/1/min_time:1.000/real_time 59317 ns 59202 ns 22727 16.4633GB/s BM_WriteRecordBatch/4/min_time:1.000/real_time 56322 ns 56191 ns 24944 17.3389GB/s BM_WriteRecordBatch/16/min_time:1.000/real_time 52027 ns 51880 ns 26682 18.7703GB/s BM_WriteRecordBatch/64/min_time:1.000/real_time 56324 ns 56202 ns 24724 17.3384GB/s BM_WriteRecordBatch/256/min_time:1.000/real_time 108096 ns 107868 ns 11284 9.03423GB/s BM_WriteRecordBatch/1024/min_time:1.000/real_time 279508 ns 278730 ns 4957 3.49386GB/s BM_WriteRecordBatch/4k/min_time:1.000/real_time 1013229 ns 1009772 ns 1191 986.944MB/s BM_WriteRecordBatch/8k/min_time:1.000/real_time 2011309 ns 2005377 ns 661 497.189MB/s BM_ReadRecordBatch/1/min_time:1.000/real_time 2507 ns 2501 ns 558949 389.563GB/s BM_ReadRecordBatch/4/min_time:1.000/real_time 5120 ns 5110 ns 275798 190.735GB/s BM_ReadRecordBatch/16/min_time:1.000/real_time 15800 ns 15706 ns 85481 61.8072GB/s BM_ReadRecordBatch/64/min_time:1.000/real_time 55678 ns 55476 ns 25022 17.5393GB/s BM_ReadRecordBatch/256/min_time:1.000/real_time 218083 ns 217596 ns 6163 4.47794GB/s BM_ReadRecordBatch/1024/min_time:1.000/real_time 875861 ns 873419 ns 1591 1.11497GB/s BM_ReadRecordBatch/4k/min_time:1.000/real_time 3545586 ns 3538141 ns 383 282.041MB/s BM_ReadRecordBatch/8k/min_time:1.000/real_time 7118830 ns 7104433 ns 194 140.473MB/s ``` Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #479 from wesm/ARROW-737 and squashes the following commits: b663ca4 [Wes McKinney] Enable mutable buffer slices, SliceMutableBuffer function Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/d54ab9a2 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/d54ab9a2 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/d54ab9a2 Branch: refs/heads/master Commit: d54ab9a23aa8d4fe52ce91b117511540cc7491bb Parents: e333576 Author: Wes McKinney <wes.mckin...@twosigma.com> Authored: Sun Apr 2 12:06:56 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Sun Apr 2 12:06:56 2017 -0400 ---------------------------------------------------------------------- cpp/src/arrow/buffer-test.cc | 17 +++++++++++++++++ cpp/src/arrow/buffer.cc | 17 ++++++++++++----- cpp/src/arrow/buffer.h | 25 +++++++++++++++++-------- 3 files changed, 46 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/d54ab9a2/cpp/src/arrow/buffer-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/buffer-test.cc b/cpp/src/arrow/buffer-test.cc index e0a2137..5815ed1 100644 --- a/cpp/src/arrow/buffer-test.cc +++ b/cpp/src/arrow/buffer-test.cc @@ -168,4 +168,21 @@ TEST_F(TestBuffer, SliceBuffer) { ASSERT_EQ(2, buf.use_count()); } +TEST_F(TestBuffer, SliceMutableBuffer) { + std::string data_str = "some data to slice"; + auto data = reinterpret_cast<const uint8_t*>(data_str.c_str()); + + std::shared_ptr<MutableBuffer> buffer; + ASSERT_OK(AllocateBuffer(default_memory_pool(), 50, &buffer)); + + memcpy(buffer->mutable_data(), data, data_str.size()); + + std::shared_ptr<Buffer> slice = SliceMutableBuffer(buffer, 5, 10); + ASSERT_TRUE(slice->is_mutable()); + ASSERT_EQ(10, slice->size()); + + Buffer expected(data + 5, 10); + ASSERT_TRUE(slice->Equals(expected)); +} + } // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/d54ab9a2/cpp/src/arrow/buffer.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc index 5962340..fb63798 100644 --- a/cpp/src/arrow/buffer.cc +++ b/cpp/src/arrow/buffer.cc @@ -27,11 +27,6 @@ namespace arrow { -Buffer::Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size) - : Buffer(parent->data() + offset, size) { - parent_ = parent; -} - Buffer::~Buffer() {} Status Buffer::Copy( @@ -116,6 +111,18 @@ Status PoolBuffer::Resize(int64_t new_size, bool shrink_to_fit) { return Status::OK(); } +std::shared_ptr<Buffer> SliceMutableBuffer( + const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length) { + return std::make_shared<MutableBuffer>(buffer, offset, length); +} + +MutableBuffer::MutableBuffer( + const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size) + : MutableBuffer(parent->mutable_data() + offset, size) { + DCHECK(parent->is_mutable()) << "Must pass mutable buffer"; + parent_ = parent; +} + Status AllocateBuffer( MemoryPool* pool, int64_t size, std::shared_ptr<MutableBuffer>* out) { auto buffer = std::make_shared<PoolBuffer>(pool); http://git-wip-us.apache.org/repos/asf/arrow/blob/d54ab9a2/cpp/src/arrow/buffer.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h index 713d57a..3f14c96 100644 --- a/cpp/src/arrow/buffer.h +++ b/cpp/src/arrow/buffer.h @@ -46,7 +46,7 @@ class Status; class ARROW_EXPORT Buffer { public: Buffer(const uint8_t* data, int64_t size) - : is_mutable_(false), data_(data), size_(size), capacity_(size) {} + : is_mutable_(false), data_(data), size_(size), capacity_(size) {} virtual ~Buffer(); /// An offset into data that is owned by another buffer, but we want to be @@ -56,7 +56,10 @@ class ARROW_EXPORT Buffer { /// This method makes no assertions about alignment or padding of the buffer but /// in general we expected buffers to be aligned and padded to 64 bytes. In the future /// we might add utility methods to help determine if a buffer satisfies this contract. - Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size); + Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size) + : Buffer(parent->data() + offset, size) { + parent_ = parent; + } bool is_mutable() const { return is_mutable_; } @@ -74,6 +77,7 @@ class ARROW_EXPORT Buffer { int64_t capacity() const { return capacity_; } const uint8_t* data() const { return data_; } + uint8_t* mutable_data() { return mutable_data_; } int64_t size() const { return size_; } @@ -82,6 +86,7 @@ class ARROW_EXPORT Buffer { protected: bool is_mutable_; const uint8_t* data_; + uint8_t* mutable_data_; int64_t size_; int64_t capacity_; @@ -99,20 +104,24 @@ static inline std::shared_ptr<Buffer> SliceBuffer( return std::make_shared<Buffer>(buffer, offset, length); } +/// Construct a mutable buffer slice. If the parent buffer is not mutable, this +/// will abort in debug builds +std::shared_ptr<Buffer> ARROW_EXPORT SliceMutableBuffer( + const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length); + /// A Buffer whose contents can be mutated. May or may not own its data. class ARROW_EXPORT MutableBuffer : public Buffer { public: - MutableBuffer(uint8_t* data, int64_t size) : Buffer(data, size) { - is_mutable_ = true; + MutableBuffer(uint8_t* data, int64_t size) + : Buffer(data, size) { mutable_data_ = data; + is_mutable_ = true; } - uint8_t* mutable_data() { return mutable_data_; } + MutableBuffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size); protected: - MutableBuffer() : Buffer(nullptr, 0), mutable_data_(nullptr) {} - - uint8_t* mutable_data_; + MutableBuffer() : Buffer(nullptr, 0) {} }; class ARROW_EXPORT ResizableBuffer : public MutableBuffer {