Repository: kudu
Updated Branches:
  refs/heads/master f441b45bf -> ca3b162e1


bshuf_block: some low-hanging-fruit optimizations on write path

Rather than adding an element at a time and calling the virtual
'IsBlockFull()' function, we predetermine how many elements we will
accept. This allows a much simpler batched 'Add()' implementation.

This sped up the write side of cfile-test's 100M-integer test about 2x.

Change-Id: Ia895f7731e5371967782ef9cb176a9d493894a83
Reviewed-on: http://gerrit.cloudera.org:8080/5195
Reviewed-by: Dan Burkert <[email protected]>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/ca3b162e
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/ca3b162e
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/ca3b162e

Branch: refs/heads/master
Commit: ca3b162e1beaba2309e3ed750ccc73489ad0d9d1
Parents: f441b45
Author: Todd Lipcon <[email protected]>
Authored: Tue Nov 22 17:03:24 2016 -0800
Committer: Adar Dembo <[email protected]>
Committed: Wed Nov 30 02:28:49 2016 +0000

----------------------------------------------------------------------
 src/kudu/cfile/bshuf_block.h | 36 ++++++++++--------------------------
 1 file changed, 10 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/ca3b162e/src/kudu/cfile/bshuf_block.h
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/bshuf_block.h b/src/kudu/cfile/bshuf_block.h
index a9ec466..794da45 100644
--- a/src/kudu/cfile/bshuf_block.h
+++ b/src/kudu/cfile/bshuf_block.h
@@ -91,31 +91,27 @@ class BShufBlockBuilder : public BlockBuilder {
   }
 
   void Reset() OVERRIDE {
+    auto block_size = options_->storage_attributes.cfile_block_size;
     count_ = 0;
     data_.clear();
-    data_.reserve(options_->storage_attributes.cfile_block_size);
+    data_.reserve(block_size);
     buffer_.clear();
     buffer_.resize(kHeaderSize);
     finished_ = false;
+    rem_elem_capacity_ = block_size / size_of_type;
   }
 
   bool IsBlockFull() const override {
-    return EstimateEncodedSize() > 
options_->storage_attributes.cfile_block_size;
+    return rem_elem_capacity_ == 0;
   }
 
   int Add(const uint8_t* vals_void, size_t count) OVERRIDE {
     DCHECK(!finished_);
-    const CppType* vals = reinterpret_cast<const CppType* >(vals_void);
-    int added = 0;
-    // If the current block is full, stop adding more items.
-    while (!IsBlockFull() && added < count) {
-      const uint8_t* ptr = reinterpret_cast<const uint8_t*>(vals);
-      data_.append(ptr, size_of_type);
-      vals++;
-      added++;
-      count_++;
-    }
-    return added;
+    int to_add = std::min<int>(rem_elem_capacity_, count);
+    data_.append(vals_void, to_add * size_of_type);
+    count_ += to_add;
+    rem_elem_capacity_ -= to_add;
+    return to_add;
   }
 
   size_t Count() const OVERRIDE {
@@ -166,19 +162,6 @@ class BShufBlockBuilder : public BlockBuilder {
     memcpy(&last_key_, cell_ptr(count_ - 1), size_of_type);
   }
 
-  size_t EstimateEncodedSize() const {
-    int num = KUDU_ALIGN_UP(count_, 8);
-    // The result of bshuf_compress_lz4_bound(num, size_of_type, 0)
-    // is always bigger than the original size (num * size_of_type).
-    // However, the compression ratio in most cases is larger than 1,
-    // Therefore, using the original size may be more accurate and
-    // cause less overhead.
-    //
-    // TODO(todd): we could make this estimate more accurate by keeping
-    // track of the maximum bit-width of the inserted elements.
-    return kHeaderSize + num * size_of_type;
-  }
-
   Slice Finish(rowid_t ordinal_pos, int final_size_of_type) {
     data_.resize(kHeaderSize + final_size_of_type * count_);
 
@@ -221,6 +204,7 @@ class BShufBlockBuilder : public BlockBuilder {
   faststring data_;
   faststring buffer_;
   uint32_t count_;
+  int rem_elem_capacity_;
   bool finished_;
   CppType first_key_;
   CppType last_key_;

Reply via email to