encodings: change IsBlockFull() to not take a limit parameter Everywhere we called this, we used the same value (the configured cfile block size). Given that, it's simpler to not take the parameter. This makes it easier for an encoder to determine whether or not it is full in some other context, since it can safely assume that it knows the block size up front.
Change-Id: I4598c1555732649eeebf43231082dd641ecfc576 Reviewed-on: http://gerrit.cloudera.org:8080/5200 Tested-by: Kudu Jenkins Reviewed-by: Adar Dembo <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/1e6d4ada Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/1e6d4ada Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/1e6d4ada Branch: refs/heads/master Commit: 1e6d4adae2c5b0ed8f414d70636c8681ec89fb97 Parents: b43c6c6 Author: Todd Lipcon <[email protected]> Authored: Wed Nov 23 08:37:27 2016 -0800 Committer: Todd Lipcon <[email protected]> Committed: Tue Nov 29 23:24:16 2016 +0000 ---------------------------------------------------------------------- src/kudu/cfile/binary_dict_block.cc | 9 ++++----- src/kudu/cfile/binary_dict_block.h | 2 +- src/kudu/cfile/binary_plain_block.cc | 8 ++++---- src/kudu/cfile/binary_plain_block.h | 2 +- src/kudu/cfile/binary_prefix_block.cc | 8 ++++---- src/kudu/cfile/binary_prefix_block.h | 2 +- src/kudu/cfile/block_encodings.h | 4 +++- src/kudu/cfile/bshuf_block.h | 6 +++--- src/kudu/cfile/cfile_writer.cc | 4 ++-- src/kudu/cfile/encoding-test.cc | 6 ++++-- src/kudu/cfile/gvint_block.cc | 10 +++++----- src/kudu/cfile/gvint_block.h | 4 ++-- src/kudu/cfile/plain_bitmap_block.h | 12 ++++++++---- src/kudu/cfile/plain_block.h | 4 ++-- src/kudu/cfile/rle_block.h | 21 +++++++++++++-------- src/kudu/cfile/type_encodings.cc | 6 +++--- 16 files changed, 60 insertions(+), 48 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/binary_dict_block.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/binary_dict_block.cc b/src/kudu/cfile/binary_dict_block.cc index 30d571a..be10158 100644 --- a/src/kudu/cfile/binary_dict_block.cc +++ b/src/kudu/cfile/binary_dict_block.cc @@ -51,7 +51,7 @@ void BinaryDictBlockBuilder::Reset() { buffer_.reserve(options_->storage_attributes.cfile_block_size); if (mode_ == kCodeWordMode && - dict_block_.IsBlockFull(options_->storage_attributes.cfile_block_size)) { + dict_block_.IsBlockFull()) { mode_ = kPlainBinaryMode; data_builder_.reset(new BinaryPlainBlockBuilder(options_)); } else { @@ -80,10 +80,9 @@ Slice BinaryDictBlockBuilder::Finish(rowid_t ordinal_pos) { // // If it is the latter case, all the subsequent data blocks will switch to // StringPlainBlock automatically. -bool BinaryDictBlockBuilder::IsBlockFull(size_t limit) const { - int block_size = options_->storage_attributes.cfile_block_size; - if (data_builder_->IsBlockFull(block_size)) return true; - if (dict_block_.IsBlockFull(block_size) && (mode_ == kCodeWordMode)) return true; +bool BinaryDictBlockBuilder::IsBlockFull() const { + if (data_builder_->IsBlockFull()) return true; + if (dict_block_.IsBlockFull() && (mode_ == kCodeWordMode)) return true; return false; } http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/binary_dict_block.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/binary_dict_block.h b/src/kudu/cfile/binary_dict_block.h index cf17204..95ff64c 100644 --- a/src/kudu/cfile/binary_dict_block.h +++ b/src/kudu/cfile/binary_dict_block.h @@ -68,7 +68,7 @@ class BinaryDictBlockBuilder : public BlockBuilder { public: explicit BinaryDictBlockBuilder(const WriterOptions* options); - bool IsBlockFull(size_t limit) const OVERRIDE; + bool IsBlockFull() const override; // Append the dictionary block for the current cfile to the end of the cfile and set the footer // accordingly. http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/binary_plain_block.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/binary_plain_block.cc b/src/kudu/cfile/binary_plain_block.cc index adef8c6..6889382 100644 --- a/src/kudu/cfile/binary_plain_block.cc +++ b/src/kudu/cfile/binary_plain_block.cc @@ -52,8 +52,8 @@ void BinaryPlainBlockBuilder::Reset() { finished_ = false; } -bool BinaryPlainBlockBuilder::IsBlockFull(size_t limit) const { - return size_estimate_ > limit; +bool BinaryPlainBlockBuilder::IsBlockFull() const { + return size_estimate_ > options_->storage_attributes.cfile_block_size; } Slice BinaryPlainBlockBuilder::Finish(rowid_t ordinal_pos) { @@ -80,10 +80,10 @@ int BinaryPlainBlockBuilder::Add(const uint8_t *vals, size_t count) { size_t i = 0; // If the block is full, should stop adding more items. - while (!IsBlockFull(options_->storage_attributes.cfile_block_size) && i < count) { + while (!IsBlockFull() && i < count) { // Every fourth entry needs a gvint selector byte - // TODO: does it cost a lot to account these things specifically? + // TODO(todd): does it cost a lot to account these things specifically? // maybe cheaper to just over-estimate - allocation is cheaper than math? if (offsets_.size() % 4 == 0) { size_estimate_++; http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/binary_plain_block.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/binary_plain_block.h b/src/kudu/cfile/binary_plain_block.h index 5b64a18..1004596 100644 --- a/src/kudu/cfile/binary_plain_block.h +++ b/src/kudu/cfile/binary_plain_block.h @@ -43,7 +43,7 @@ class BinaryPlainBlockBuilder : public BlockBuilder { public: explicit BinaryPlainBlockBuilder(const WriterOptions *options); - bool IsBlockFull(size_t limit) const OVERRIDE; + bool IsBlockFull() const override; int Add(const uint8_t *vals, size_t count) OVERRIDE; http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/binary_prefix_block.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/binary_prefix_block.cc b/src/kudu/cfile/binary_prefix_block.cc index fa59d12..0c299b0 100644 --- a/src/kudu/cfile/binary_prefix_block.cc +++ b/src/kudu/cfile/binary_prefix_block.cc @@ -79,9 +79,9 @@ void BinaryPrefixBlockBuilder::Reset() { last_val_.clear(); } -bool BinaryPrefixBlockBuilder::IsBlockFull(size_t limit) const { - // TODO: take restarts size into account - return buffer_.size() > limit; +bool BinaryPrefixBlockBuilder::IsBlockFull() const { + // TODO(todd): take restarts size into account + return buffer_.size() > options_->storage_attributes.cfile_block_size; } Slice BinaryPrefixBlockBuilder::Finish(rowid_t ordinal_pos) { @@ -132,7 +132,7 @@ int BinaryPrefixBlockBuilder::Add(const uint8_t *vals, size_t count) { Slice prev_val(last_val_); // We generate a static call to IsBlockFull() to avoid the vtable lookup // in this hot path. - while (!BinaryPrefixBlockBuilder::IsBlockFull(options_->storage_attributes.cfile_block_size) && + while (!BinaryPrefixBlockBuilder::IsBlockFull() && added < count) { const Slice val = slices[added]; http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/binary_prefix_block.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/binary_prefix_block.h b/src/kudu/cfile/binary_prefix_block.h index f78b01d..3094b07 100644 --- a/src/kudu/cfile/binary_prefix_block.h +++ b/src/kudu/cfile/binary_prefix_block.h @@ -37,7 +37,7 @@ class BinaryPrefixBlockBuilder : public BlockBuilder { public: explicit BinaryPrefixBlockBuilder(const WriterOptions *options); - bool IsBlockFull(size_t limit) const OVERRIDE; + bool IsBlockFull() const override; int Add(const uint8_t *vals, size_t count) OVERRIDE; http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/block_encodings.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/block_encodings.h b/src/kudu/cfile/block_encodings.h index 9abd5db..f7b7c10 100644 --- a/src/kudu/cfile/block_encodings.h +++ b/src/kudu/cfile/block_encodings.h @@ -48,8 +48,10 @@ class BlockBuilder { } // Used by the cfile writer to determine whether the current block is full. + // A block is full if it its estimated size is larger than the configured + // WriterOptions' cfile_block_size. // If it is full, the cfile writer will call FinishCurDataBlock(). - virtual bool IsBlockFull(size_t limit) const = 0; + virtual bool IsBlockFull() const = 0; // Add a sequence of values to the block. // Returns the number of values actually added, which may be less http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/bshuf_block.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/bshuf_block.h b/src/kudu/cfile/bshuf_block.h index 10caa32..4a4b551 100644 --- a/src/kudu/cfile/bshuf_block.h +++ b/src/kudu/cfile/bshuf_block.h @@ -98,15 +98,15 @@ class BShufBlockBuilder : public BlockBuilder { buffer_.resize(kHeaderSize); } - bool IsBlockFull(size_t limit) const OVERRIDE { - return EstimateEncodedSize() > limit; + bool IsBlockFull() const override { + return EstimateEncodedSize() > options_->storage_attributes.cfile_block_size; } int Add(const uint8_t* vals_void, size_t count) OVERRIDE { const CppType* vals = reinterpret_cast<const CppType* >(vals_void); int added = 0; // If the current block is full, stop adding more items. - while (!IsBlockFull(options_->storage_attributes.cfile_block_size) && added < count) { + while (!IsBlockFull() && added < count) { const uint8_t* ptr = reinterpret_cast<const uint8_t*>(vals); data_.append(ptr, size_of_type); vals++; http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/cfile_writer.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/cfile_writer.cc b/src/kudu/cfile/cfile_writer.cc index 091bf26..018b309 100644 --- a/src/kudu/cfile/cfile_writer.cc +++ b/src/kudu/cfile/cfile_writer.cc @@ -297,7 +297,7 @@ Status CFileWriter::AppendEntries(const void *entries, size_t count) { rem -= n; value_count_ += n; - if (data_block_->IsBlockFull(options_.storage_attributes.cfile_block_size)) { + if (data_block_->IsBlockFull()) { RETURN_NOT_OK(FinishCurDataBlock()); } } @@ -328,7 +328,7 @@ Status CFileWriter::AppendNullableEntries(const uint8_t *bitmap, value_count_ += n; rem -= n; - if (data_block_->IsBlockFull(options_.storage_attributes.cfile_block_size)) { + if (data_block_->IsBlockFull()) { RETURN_NOT_OK(FinishCurDataBlock()); } http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/encoding-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/encoding-test.cc b/src/kudu/cfile/encoding-test.cc index a3717f6..d8e5cc3 100644 --- a/src/kudu/cfile/encoding-test.cc +++ b/src/kudu/cfile/encoding-test.cc @@ -559,7 +559,8 @@ class TestEncoding : public ::testing::Test { i += run_size; } - BuilderType bb; + unique_ptr<WriterOptions> opts(NewWriterOptions()); + BuilderType bb(opts.get()); bb.Add(reinterpret_cast<const uint8_t *>(&to_insert[0]), to_insert.size()); Slice s = bb.Finish(kOrdinalPosBase); @@ -687,7 +688,8 @@ TEST_F(TestEncoding, TestIntBlockEncoder) { } TEST_F(TestEncoding, TestRleIntBlockEncoder) { - RleIntBlockBuilder<UINT32> ibb; + unique_ptr<WriterOptions> opts(NewWriterOptions()); + RleIntBlockBuilder<UINT32> ibb(opts.get()); gscoped_ptr<int[]> ints(new int[10000]); for (int i = 0; i < 10000; i++) { ints[i] = random(); http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/gvint_block.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/gvint_block.cc b/src/kudu/cfile/gvint_block.cc index 1a8c6cc..bf402d1 100644 --- a/src/kudu/cfile/gvint_block.cc +++ b/src/kudu/cfile/gvint_block.cc @@ -47,17 +47,17 @@ void GVIntBlockBuilder::Reset() { estimated_raw_size_ = 0; } -bool GVIntBlockBuilder::IsBlockFull(size_t limit) const { - return EstimateEncodedSize() > limit; +bool GVIntBlockBuilder::IsBlockFull() const { + return EstimateEncodedSize() > options_->storage_attributes.cfile_block_size; } -int GVIntBlockBuilder::Add(const uint8_t *vals_void, size_t count) { - const uint32_t *vals = reinterpret_cast<const uint32_t *>(vals_void); +int GVIntBlockBuilder::Add(const uint8_t *vals_buf, size_t count) { + const uint32_t *vals = reinterpret_cast<const uint32_t *>(vals_buf); int added = 0; // If the block is full, should stop adding more items. - while (!IsBlockFull(options_->storage_attributes.cfile_block_size) && added < count) { + while (!IsBlockFull() && added < count) { uint32_t val = *vals++; estimated_raw_size_ += CalcRequiredBytes32(val); ints_.push_back(val); http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/gvint_block.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/gvint_block.h b/src/kudu/cfile/gvint_block.h index 21c96f8..6c386fd 100644 --- a/src/kudu/cfile/gvint_block.h +++ b/src/kudu/cfile/gvint_block.h @@ -47,9 +47,9 @@ class GVIntBlockBuilder : public BlockBuilder { public: explicit GVIntBlockBuilder(const WriterOptions *options); - bool IsBlockFull(size_t limit) const OVERRIDE; + bool IsBlockFull() const override; - int Add(const uint8_t *vals, size_t count) OVERRIDE; + int Add(const uint8_t *vals_buf, size_t count) OVERRIDE; Slice Finish(rowid_t ordinal_pos) OVERRIDE; http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/plain_bitmap_block.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/plain_bitmap_block.h b/src/kudu/cfile/plain_bitmap_block.h index af62773..04e1f89 100644 --- a/src/kudu/cfile/plain_bitmap_block.h +++ b/src/kudu/cfile/plain_bitmap_block.h @@ -22,6 +22,7 @@ #include <string> #include "kudu/cfile/block_encodings.h" +#include "kudu/cfile/cfile_util.h" #include "kudu/common/columnblock.h" #include "kudu/gutil/port.h" #include "kudu/gutil/strings/substitute.h" @@ -42,13 +43,14 @@ struct WriterOptions; // class PlainBitMapBlockBuilder : public BlockBuilder { public: - PlainBitMapBlockBuilder() - : writer_(&buf_) { + explicit PlainBitMapBlockBuilder(const WriterOptions* options) + : writer_(&buf_), + options_(options) { Reset(); } - virtual bool IsBlockFull(size_t limit) const OVERRIDE { - return writer_.bytes_written() > limit; + virtual bool IsBlockFull() const override { + return writer_.bytes_written() > options_->storage_attributes.cfile_block_size; } virtual int Add(const uint8_t* vals, size_t count) OVERRIDE { @@ -96,6 +98,8 @@ class PlainBitMapBlockBuilder : public BlockBuilder { faststring buf_; BitWriter writer_; size_t count_; + + const WriterOptions* const options_; }; http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/plain_block.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/plain_block.h b/src/kudu/cfile/plain_block.h index 3095de2..e01f3fa 100644 --- a/src/kudu/cfile/plain_block.h +++ b/src/kudu/cfile/plain_block.h @@ -61,8 +61,8 @@ class PlainBlockBuilder : public BlockBuilder { return count; } - virtual bool IsBlockFull(size_t limit) const OVERRIDE { - return buffer_.size() > limit; + virtual bool IsBlockFull() const override { + return buffer_.size() > options_->storage_attributes.cfile_block_size; } virtual Slice Finish(rowid_t ordinal_pos) OVERRIDE { http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/rle_block.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/rle_block.h b/src/kudu/cfile/rle_block.h index 1bc69eb..b441529 100644 --- a/src/kudu/cfile/rle_block.h +++ b/src/kudu/cfile/rle_block.h @@ -23,6 +23,7 @@ #include "kudu/gutil/port.h" #include "kudu/cfile/block_encodings.h" +#include "kudu/cfile/cfile_util.h" #include "kudu/common/columnblock.h" #include "kudu/util/coding.h" #include "kudu/util/coding-inl.h" @@ -47,8 +48,9 @@ enum { // class RleBitMapBlockBuilder : public BlockBuilder { public: - RleBitMapBlockBuilder() - : encoder_(&buf_, 1) { + explicit RleBitMapBlockBuilder(const WriterOptions* options) + : encoder_(&buf_, 1), + options_(options) { Reset(); } @@ -64,8 +66,8 @@ class RleBitMapBlockBuilder : public BlockBuilder { return count; } - virtual bool IsBlockFull(size_t limit) const OVERRIDE { - return encoder_.len() > limit; + virtual bool IsBlockFull() const override { + return encoder_.len() > options_->storage_attributes.cfile_block_size; } virtual Slice Finish(rowid_t ordinal_pos) OVERRIDE { @@ -98,6 +100,7 @@ class RleBitMapBlockBuilder : public BlockBuilder { private: faststring buf_; RleEncoder<bool> encoder_; + const WriterOptions* const options_; size_t count_; }; @@ -212,13 +215,14 @@ class RleBitMapBlockDecoder : public BlockDecoder { template <DataType IntType> class RleIntBlockBuilder : public BlockBuilder { public: - explicit RleIntBlockBuilder(const WriterOptions* opts = NULL) - : rle_encoder_(&buf_, kCppTypeSize * 8) { + explicit RleIntBlockBuilder(const WriterOptions* options) + : rle_encoder_(&buf_, kCppTypeSize * 8), + options_(options) { Reset(); } - virtual bool IsBlockFull(size_t limit) const OVERRIDE { - return rle_encoder_.len() > limit; + virtual bool IsBlockFull() const OVERRIDE { + return rle_encoder_.len() > options_->storage_attributes.cfile_block_size; } virtual int Add(const uint8_t* vals_void, size_t count) OVERRIDE { @@ -281,6 +285,7 @@ class RleIntBlockBuilder : public BlockBuilder { faststring buf_; size_t count_; RleEncoder<CppType> rle_encoder_; + const WriterOptions* const options_; }; // http://git-wip-us.apache.org/repos/asf/kudu/blob/1e6d4ada/src/kudu/cfile/type_encodings.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/type_encodings.cc b/src/kudu/cfile/type_encodings.cc index dd249ac..cb4d79a 100644 --- a/src/kudu/cfile/type_encodings.cc +++ b/src/kudu/cfile/type_encodings.cc @@ -107,7 +107,7 @@ template<> struct DataTypeEncodingTraits<BOOL, PLAIN_ENCODING> { static Status CreateBlockBuilder(BlockBuilder **bb, const WriterOptions *options) { - *bb = new PlainBitMapBlockBuilder(); + *bb = new PlainBitMapBlockBuilder(options); return Status::OK(); } @@ -124,7 +124,7 @@ template<> struct DataTypeEncodingTraits<BOOL, RLE> { static Status CreateBlockBuilder(BlockBuilder** bb, const WriterOptions *options) { - *bb = new RleBitMapBlockBuilder(); + *bb = new RleBitMapBlockBuilder(options); return Status::OK(); } @@ -189,7 +189,7 @@ template<DataType IntType> struct DataTypeEncodingTraits<IntType, RLE> { static Status CreateBlockBuilder(BlockBuilder** bb, const WriterOptions *options) { - *bb = new RleIntBlockBuilder<IntType>(); + *bb = new RleIntBlockBuilder<IntType>(options); return Status::OK(); }
