Repository: kudu Updated Branches: refs/heads/master f441b45bf -> ca3b162e1
bshuf_block: some low-hanging-fruit optimizations on write path Rather than adding an element at a time and calling the virtual 'IsBlockFull()' function, we predetermine how many elements we will accept. This allows a much simpler batched 'Add()' implementation. This sped up the write side of cfile-test's 100M-integer test about 2x. Change-Id: Ia895f7731e5371967782ef9cb176a9d493894a83 Reviewed-on: http://gerrit.cloudera.org:8080/5195 Reviewed-by: Dan Burkert <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/ca3b162e Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/ca3b162e Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/ca3b162e Branch: refs/heads/master Commit: ca3b162e1beaba2309e3ed750ccc73489ad0d9d1 Parents: f441b45 Author: Todd Lipcon <[email protected]> Authored: Tue Nov 22 17:03:24 2016 -0800 Committer: Adar Dembo <[email protected]> Committed: Wed Nov 30 02:28:49 2016 +0000 ---------------------------------------------------------------------- src/kudu/cfile/bshuf_block.h | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/ca3b162e/src/kudu/cfile/bshuf_block.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/bshuf_block.h b/src/kudu/cfile/bshuf_block.h index a9ec466..794da45 100644 --- a/src/kudu/cfile/bshuf_block.h +++ b/src/kudu/cfile/bshuf_block.h @@ -91,31 +91,27 @@ class BShufBlockBuilder : public BlockBuilder { } void Reset() OVERRIDE { + auto block_size = options_->storage_attributes.cfile_block_size; count_ = 0; data_.clear(); - data_.reserve(options_->storage_attributes.cfile_block_size); + data_.reserve(block_size); buffer_.clear(); buffer_.resize(kHeaderSize); finished_ = false; + rem_elem_capacity_ = block_size / size_of_type; } bool IsBlockFull() const override { - return EstimateEncodedSize() > options_->storage_attributes.cfile_block_size; + return rem_elem_capacity_ == 0; } int Add(const uint8_t* vals_void, size_t count) OVERRIDE { DCHECK(!finished_); - const CppType* vals = reinterpret_cast<const CppType* >(vals_void); - int added = 0; - // If the current block is full, stop adding more items. - while (!IsBlockFull() && added < count) { - const uint8_t* ptr = reinterpret_cast<const uint8_t*>(vals); - data_.append(ptr, size_of_type); - vals++; - added++; - count_++; - } - return added; + int to_add = std::min<int>(rem_elem_capacity_, count); + data_.append(vals_void, to_add * size_of_type); + count_ += to_add; + rem_elem_capacity_ -= to_add; + return to_add; } size_t Count() const OVERRIDE { @@ -166,19 +162,6 @@ class BShufBlockBuilder : public BlockBuilder { memcpy(&last_key_, cell_ptr(count_ - 1), size_of_type); } - size_t EstimateEncodedSize() const { - int num = KUDU_ALIGN_UP(count_, 8); - // The result of bshuf_compress_lz4_bound(num, size_of_type, 0) - // is always bigger than the original size (num * size_of_type). - // However, the compression ratio in most cases is larger than 1, - // Therefore, using the original size may be more accurate and - // cause less overhead. - // - // TODO(todd): we could make this estimate more accurate by keeping - // track of the maximum bit-width of the inserted elements. - return kHeaderSize + num * size_of_type; - } - Slice Finish(rowid_t ordinal_pos, int final_size_of_type) { data_.resize(kHeaderSize + final_size_of_type * count_); @@ -221,6 +204,7 @@ class BShufBlockBuilder : public BlockBuilder { faststring data_; faststring buffer_; uint32_t count_; + int rem_elem_capacity_; bool finished_; CppType first_key_; CppType last_key_;
