Repository: kudu
Updated Branches:
  refs/heads/master 835cf6c1e -> 2b5ad4a9b


Remove GROUP_VARINT encoding

This encoding was only ever implemented for the UINT32 type, and users
can no longer specify this type, since we only support signed ints.
Thus, there's no sense carrying around this baggage.

This patch removes the encoding as well as associated tests.

Change-Id: I0b0aeb5bfd8451a6e190ccb6e55e971da668b062
Reviewed-on: http://gerrit.cloudera.org:8080/5271
Reviewed-by: Dan Burkert <[email protected]>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/2b5ad4a9
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/2b5ad4a9
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/2b5ad4a9

Branch: refs/heads/master
Commit: 2b5ad4a9b7a98cd3bb618f90572e504ff192f246
Parents: 835cf6c
Author: Todd Lipcon <[email protected]>
Authored: Tue Nov 29 17:46:16 2016 -0800
Committer: Todd Lipcon <[email protected]>
Committed: Wed Nov 30 17:33:11 2016 +0000

----------------------------------------------------------------------
 src/kudu/cfile/CMakeLists.txt      |   1 -
 src/kudu/cfile/cfile-test.cc       |   8 +-
 src/kudu/cfile/cfile_reader.cc     |   3 +-
 src/kudu/cfile/cfile_writer.h      |   2 -
 src/kudu/cfile/encoding-test.cc    |  55 -----
 src/kudu/cfile/gvint_block.cc      | 378 --------------------------------
 src/kudu/cfile/gvint_block.h       | 165 --------------
 src/kudu/cfile/type_encodings.cc   |  19 --
 src/kudu/client/schema.h           |   7 +-
 src/kudu/common/common.proto       |   1 +
 src/kudu/tablet/diskrowset-test.cc |   4 +-
 11 files changed, 14 insertions(+), 629 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/2b5ad4a9/src/kudu/cfile/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/CMakeLists.txt b/src/kudu/cfile/CMakeLists.txt
index f192c6d..dbaa2a3 100644
--- a/src/kudu/cfile/CMakeLists.txt
+++ b/src/kudu/cfile/CMakeLists.txt
@@ -41,7 +41,6 @@ add_library(cfile
   cfile_util.cc
   cfile_writer.cc
   compression_codec.cc
-  gvint_block.cc
   index_block.cc
   index_btree.cc
   type_encodings.cc)

http://git-wip-us.apache.org/repos/asf/kudu/blob/2b5ad4a9/src/kudu/cfile/cfile-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile-test.cc b/src/kudu/cfile/cfile-test.cc
index 4514c54..1db6b76 100644
--- a/src/kudu/cfile/cfile-test.cc
+++ b/src/kudu/cfile/cfile-test.cc
@@ -443,7 +443,7 @@ TEST_P(TestCFileBothCacheTypes, 
TestWrite1MDuplicateFileStringsDictEncoding) {
 }
 
 TEST_P(TestCFileBothCacheTypes, TestReadWriteUInt32) {
-  for (auto enc : { PLAIN_ENCODING, RLE, GROUP_VARINT }) {
+  for (auto enc : { PLAIN_ENCODING, RLE }) {
     TestReadWriteFixedSizeTypes<UInt32DataGenerator<false>>(enc);
   }
 }
@@ -740,8 +740,10 @@ TEST_P(TestCFileBothCacheTypes, TestAppendRaw) {
 
 TEST_P(TestCFileBothCacheTypes, TestNullInts) {
   UInt32DataGenerator<true> generator;
-  TestNullTypes(&generator, GROUP_VARINT, NO_COMPRESSION);
-  TestNullTypes(&generator, GROUP_VARINT, LZ4);
+  TestNullTypes(&generator, PLAIN_ENCODING, NO_COMPRESSION);
+  TestNullTypes(&generator, PLAIN_ENCODING, LZ4);
+  TestNullTypes(&generator, BIT_SHUFFLE, NO_COMPRESSION);
+  TestNullTypes(&generator, BIT_SHUFFLE, LZ4);
 }
 
 TEST_P(TestCFileBothCacheTypes, TestNullFloats) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/2b5ad4a9/src/kudu/cfile/cfile_reader.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile_reader.cc b/src/kudu/cfile/cfile_reader.cc
index ff57931..d2ec70b 100644
--- a/src/kudu/cfile/cfile_reader.cc
+++ b/src/kudu/cfile/cfile_reader.cc
@@ -21,15 +21,14 @@
 
 #include <algorithm>
 
+#include "kudu/cfile/binary_plain_block.h"
 #include "kudu/cfile/block_cache.h"
 #include "kudu/cfile/block_handle.h"
 #include "kudu/cfile/block_pointer.h"
 #include "kudu/cfile/cfile.pb.h"
 #include "kudu/cfile/cfile_writer.h" // for kMagicString
-#include "kudu/cfile/gvint_block.h"
 #include "kudu/cfile/index_block.h"
 #include "kudu/cfile/index_btree.h"
-#include "kudu/cfile/binary_plain_block.h"
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/mathlimits.h"
 #include "kudu/gutil/strings/substitute.h"

http://git-wip-us.apache.org/repos/asf/kudu/blob/2b5ad4a9/src/kudu/cfile/cfile_writer.h
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile_writer.h b/src/kudu/cfile/cfile_writer.h
index 74984f7..db55286 100644
--- a/src/kudu/cfile/cfile_writer.h
+++ b/src/kudu/cfile/cfile_writer.h
@@ -48,8 +48,6 @@ using std::unordered_map;
 
 class BlockPointer;
 class BTreeInfoPB;
-class GVIntBlockBuilder;
-class BinaryPrefixBlockBuilder;
 class IndexTreeBuilder;
 
 // Magic used in header/footer

http://git-wip-us.apache.org/repos/asf/kudu/blob/2b5ad4a9/src/kudu/cfile/encoding-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/encoding-test.cc b/src/kudu/cfile/encoding-test.cc
index b8ece20..f667bf7 100644
--- a/src/kudu/cfile/encoding-test.cc
+++ b/src/kudu/cfile/encoding-test.cc
@@ -25,7 +25,6 @@
 #include "kudu/cfile/block_encodings.h"
 #include "kudu/cfile/bshuf_block.h"
 #include "kudu/cfile/cfile_writer.h"
-#include "kudu/cfile/gvint_block.h"
 #include "kudu/cfile/plain_bitmap_block.h"
 #include "kudu/cfile/plain_block.h"
 #include "kudu/cfile/rle_block.h"
@@ -681,27 +680,6 @@ TEST_F(TestEncoding, TestBShufDoubleBlockEncoder) {
                                     BShufBlockDecoder<DOUBLE> >(doubles.get(), 
kSize);
 }
 
-TEST_F(TestEncoding, TestIntBlockEncoder) {
-  gscoped_ptr<WriterOptions> opts(NewWriterOptions());
-  GVIntBlockBuilder ibb(opts.get());
-
-  auto ints = new int[10000];
-  for (int i = 0; i < 10000; i++) {
-    ints[i] = random();
-  }
-  ibb.Add(reinterpret_cast<const uint8_t *>(ints), 10000);
-  delete[] ints;
-
-  Slice s = ibb.Finish(12345);
-  LOG(INFO) << "Encoded size for 10k ints: " << s.size();
-
-  // Test empty case -- should be 5 bytes for just the
-  // header word (all zeros)
-  ibb.Reset();
-  s = ibb.Finish(0);
-  ASSERT_EQ(5UL, s.size());
-}
-
 TEST_F(TestEncoding, TestRleIntBlockEncoder) {
   unique_ptr<WriterOptions> opts(NewWriterOptions());
   RleIntBlockBuilder<UINT32> ibb(opts.get());
@@ -732,16 +710,6 @@ TEST_F(TestEncoding, TestRleBitMapRoundTrip) {
   TestBoolBlockRoundTrip<RleBitMapBlockBuilder, RleBitMapBlockDecoder>();
 }
 
-TEST_F(TestEncoding, TestGVIntBlockRoundTrip) {
-  gscoped_ptr<WriterOptions> opts(NewWriterOptions());
-  gscoped_ptr<GVIntBlockBuilder> ibb(new GVIntBlockBuilder(opts.get()));
-  TestIntBlockRoundTrip<GVIntBlockBuilder, GVIntBlockDecoder, 
UINT32>(ibb.get());
-}
-
-TEST_F(TestEncoding, TestGVIntEmptyBlockEncodeDecode) {
-  TestEmptyBlockEncodeDecode<GVIntBlockBuilder, GVIntBlockDecoder>();
-}
-
 // Test seeking to a value in a small block.
 // Regression test for a bug seen in development where this would
 // infinite loop when there are no 'restarts' in a given block.
@@ -790,29 +758,6 @@ TEST_F(TestEncoding, 
TestBinaryPrefixBlockBuilderTruncation) {
   TestBinaryBlockTruncation<BinaryPrefixBlockBuilder, 
BinaryPrefixBlockDecoder>();
 }
 
-#ifdef NDEBUG
-TEST_F(TestEncoding, GVIntSeekBenchmark) {
-  gscoped_ptr<WriterOptions> opts(NewWriterOptions());
-  gscoped_ptr<GVIntBlockBuilder> ibb(new GVIntBlockBuilder(opts.get()));
-  DoSeekTest<GVIntBlockBuilder, GVIntBlockDecoder, UINT32>(ibb.get(), 32768, 
100000, false);
-}
-#endif
-
-TEST_F(TestEncoding, GVIntSeekTest) {
-  gscoped_ptr<WriterOptions> opts(NewWriterOptions());
-  gscoped_ptr<GVIntBlockBuilder> ibb(new GVIntBlockBuilder(opts.get()));
-  DoSeekTest<GVIntBlockBuilder, GVIntBlockDecoder, UINT32>(ibb.get(), 64, 
1000, true);
-}
-
-TEST_F(TestEncoding, GVIntSeekTestTinyBlock) {
-  gscoped_ptr<WriterOptions> opts(NewWriterOptions());
-  for (int block_size = 1; block_size < 16; block_size++) {
-    gscoped_ptr<GVIntBlockBuilder> ibb(new GVIntBlockBuilder(opts.get()));
-    DoSeekTest<GVIntBlockBuilder, GVIntBlockDecoder, UINT32>(ibb.get(), 
block_size, 1000, true);
-  }
-}
-
-
 // We have several different encodings for INT blocks.
 // The following tests use GTest's TypedTest functionality to run the tests
 // for each of the encodings.

http://git-wip-us.apache.org/repos/asf/kudu/blob/2b5ad4a9/src/kudu/cfile/gvint_block.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/gvint_block.cc b/src/kudu/cfile/gvint_block.cc
deleted file mode 100644
index bf402d1..0000000
--- a/src/kudu/cfile/gvint_block.cc
+++ /dev/null
@@ -1,378 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <algorithm>
-
-#include "kudu/cfile/cfile_writer.h"
-#include "kudu/cfile/gvint_block.h"
-#include "kudu/common/columnblock.h"
-#include "kudu/gutil/casts.h"
-#include "kudu/gutil/mathlimits.h"
-#include "kudu/util/group_varint-inl.h"
-
-namespace kudu { namespace cfile {
-
-using kudu::coding::AppendGroupVarInt32;
-using kudu::coding::CalcRequiredBytes32;
-using kudu::coding::DecodeGroupVarInt32;
-using kudu::coding::DecodeGroupVarInt32_SlowButSafe;
-using kudu::coding::DecodeGroupVarInt32_SSE_Add;
-using kudu::coding::AppendGroupVarInt32Sequence;
-
-GVIntBlockBuilder::GVIntBlockBuilder(const WriterOptions *options)
- : estimated_raw_size_(0),
-   options_(options) {
-  Reset();
-}
-
-
-void GVIntBlockBuilder::Reset() {
-  ints_.clear();
-  buffer_.clear();
-  ints_.reserve(options_->storage_attributes.cfile_block_size / 
sizeof(uint32_t));
-  estimated_raw_size_ = 0;
-}
-
-bool GVIntBlockBuilder::IsBlockFull() const {
-  return EstimateEncodedSize() > options_->storage_attributes.cfile_block_size;
-}
-
-int GVIntBlockBuilder::Add(const uint8_t *vals_buf, size_t count) {
-  const uint32_t *vals = reinterpret_cast<const uint32_t *>(vals_buf);
-
-  int added = 0;
-
-  // If the block is full, should stop adding more items.
-  while (!IsBlockFull() && added < count) {
-    uint32_t val = *vals++;
-    estimated_raw_size_ += CalcRequiredBytes32(val);
-    ints_.push_back(val);
-    added++;
-  }
-
-  return added;
-}
-
-size_t GVIntBlockBuilder::Count() const {
-  return ints_.size();
-}
-
-Status GVIntBlockBuilder::GetFirstKey(void *key) const {
-  if (ints_.empty()) {
-    return Status::NotFound("no keys in data block");
-  }
-
-  *reinterpret_cast<uint32_t *>(key) = ints_[0];
-  return Status::OK();
-}
-
-Status GVIntBlockBuilder::GetLastKey(void *key) const {
-  if (ints_.empty()) {
-    return Status::NotFound("no keys in data block");
-  }
-
-  *reinterpret_cast<uint32_t *>(key) = ints_[ints_.size() - 1];
-  return Status::OK();
-}
-
-Slice GVIntBlockBuilder::Finish(rowid_t ordinal_pos) {
-  int size_estimate = EstimateEncodedSize();
-  buffer_.reserve(size_estimate);
-  // TODO: negatives and big ints
-
-  IntType min = 0;
-  size_t size = ints_.size();
-
-  if (size > 0) {
-    min = *std::min_element(ints_.begin(), ints_.end());
-  }
-
-  CHECK_LT(ordinal_pos, MathLimits<uint32_t>::kMax) <<
-    "TODO: support large files";
-
-  buffer_.clear();
-  AppendGroupVarInt32(&buffer_,
-                      implicit_cast<uint32_t>(size),
-                      implicit_cast<uint32_t>(min),
-                      implicit_cast<uint32_t>(ordinal_pos), 0);
-
-  if (size > 0) {
-    AppendGroupVarInt32Sequence(&buffer_, min, &ints_[0], size);
-  }
-
-  // Our estimate should always be an upper bound, or else there's a bunch of
-  // extra copies due to resizes here.
-  DCHECK_GE(size_estimate, buffer_.size());
-
-  return Slice(buffer_.data(), buffer_.size());
-}
-
-////////////////////////////////////////////////////////////
-// Decoder
-////////////////////////////////////////////////////////////
-
-GVIntBlockDecoder::GVIntBlockDecoder(Slice slice)
-    : data_(std::move(slice)),
-      parsed_(false),
-      cur_pos_(nullptr),
-      cur_idx_(0) {
-}
-
-Status GVIntBlockDecoder::ParseHeader() {
-  // TODO: better range check
-  CHECK_GE(data_.size(), kMinHeaderSize);
-
-  uint32_t unused;
-  ints_start_ = DecodeGroupVarInt32_SlowButSafe(
-    (const uint8_t *)data_.data(), &num_elems_, &min_elem_,
-    &ordinal_pos_base_, &unused);
-
-  if (num_elems_ > 0 && num_elems_ * 5 / 4 > data_.size()) {
-    return Status::Corruption("bad number of elems in int block");
-  }
-
-  parsed_ = true;
-  SeekToStart();
-
-  return Status::OK();
-}
-
-class NullSink {
- public:
-  template <typename T>
-  void push_back(T t) {}
-};
-
-template<typename T>
-class PtrSink {
- public:
-  explicit PtrSink(uint8_t *ptr)
-    : ptr_(reinterpret_cast<T *>(ptr))
-  {}
-
-  void push_back(const T &t) {
-    *ptr_++ = t;
-  }
-
- private:
-  T *ptr_;
-};
-
-void GVIntBlockDecoder::SeekToPositionInBlock(uint pos) {
-  CHECK(parsed_) << "Must call ParseHeader()";
-
-  // no-op if seeking to current position
-  if (cur_idx_ == pos && cur_pos_ != nullptr) return;
-
-  // Reset to start of block
-  cur_pos_ = ints_start_;
-  cur_idx_ = 0;
-  pending_.clear();
-
-  NullSink null;
-  // TODO: should this return Status?
-  size_t n = pos;
-  CHECK_OK(DoGetNextValues(&n, &null));
-}
-
-Status GVIntBlockDecoder::SeekAtOrAfterValue(const void *value_void,
-                                             bool *exact_match) {
-
-  // for now, use a linear search.
-  // TODO: evaluate adding a few pointers at the end of the block back
-  // into every 16th group or so?
-  SeekToPositionInBlock(0);
-
-  // Stop here if the target is < the first elem of the block.
-  uint32_t target = *reinterpret_cast<const uint32_t *>(value_void);
-  if (target < min_elem_) {
-    *exact_match = false;
-    return Status::OK();
-  }
-
-  // Put target into this block's frame of reference
-  uint32_t rel_target = target - min_elem_;
-
-  const uint8_t *prev_group_pos = cur_pos_;
-
-  // Search for the group which contains the target
-  while (cur_idx_ < num_elems_) {
-    uint8_t tag = *cur_pos_++;
-    uint8_t a_sel = (tag & BOOST_BINARY(11 00 00 00)) >> 6;
-
-    // Determine length of first in this block
-    uint32_t first_elem = *reinterpret_cast<const uint32_t *>(cur_pos_)
-      & coding::MASKS[a_sel];
-    if (rel_target < first_elem) {
-      // target fell in previous group
-      DCHECK_GE(cur_idx_, 4);
-      cur_idx_ -= 4;
-      cur_pos_ = prev_group_pos;
-      break;
-    }
-
-    // Skip group;
-    uint8_t group_len = coding::VARINT_SELECTOR_LENGTHS[tag];
-    prev_group_pos = cur_pos_ - 1;
-    cur_pos_ += group_len;
-    cur_idx_ += 4;
-  }
-
-  if (cur_idx_ >= num_elems_) {
-    // target may be in the last group in the block
-    DCHECK_GE(cur_idx_, 4);
-    cur_idx_ -= 4;
-    cur_pos_ = prev_group_pos;
-  }
-
-  // We're now pointed at the correct group. Decode it
-
-  uint32_t chunk[4];
-  PtrSink<uint32_t> sink(reinterpret_cast<uint8_t *>(chunk));
-  size_t count = 4;
-  RETURN_NOT_OK(DoGetNextValues(&count, &sink));
-
-  // Reset the index back to the start of this block
-  cur_idx_ -= count;
-
-  for (int i = 0; i < count; i++) {
-    if (chunk[i] >= target) {
-      *exact_match = chunk[i] == target;
-      cur_idx_ += i;
-
-      int rem = count; // convert to signed
-
-      while (rem-- > i) {
-        pending_.push_back(chunk[rem]);
-      }
-
-      return Status::OK();
-    }
-  }
-
-  // If it wasn't in this block, then it falls between this block
-  // and the following one. So, we are positioned correctly.
-  cur_idx_ += count;
-  *exact_match = false;
-
-  if (cur_idx_ == num_elems_) {
-    // If it wasn't in the block, and this was the last block,
-    // mark as not found
-    return Status::NotFound("not in block");
-  }
-
-  return Status::OK();
-}
-
-Status GVIntBlockDecoder::CopyNextValues(size_t *n, ColumnDataView *dst) {
-  DCHECK_EQ(dst->type_info()->physical_type(), UINT32);
-  DCHECK_EQ(dst->stride(), sizeof(uint32_t));
-
-  PtrSink<uint32_t> sink(dst->data());
-  return DoGetNextValues(n, &sink);
-}
-
-Status GVIntBlockDecoder::CopyNextValuesToArray(size_t *n, uint8_t* array) {
-  PtrSink<uint32_t> sink(array);
-  return DoGetNextValues(n, &sink);
-}
-
-template<class IntSink>
-inline Status GVIntBlockDecoder::DoGetNextValues(size_t *n_param, IntSink 
*sink) {
-  size_t n = *n_param;
-  int start_idx = cur_idx_;
-  size_t rem = num_elems_ - cur_idx_;
-  assert(rem >= 0);
-
-  // Only fetch up to remaining amount
-  n = std::min(rem, n);
-
-  float min_elem_f = bit_cast<float>(min_elem_);
-  __m128i min_elem_xmm = (__m128i)_mm_set_ps(
-    min_elem_f, min_elem_f, min_elem_f, min_elem_f);
-
-  // First drain pending_
-  while (n > 0 && !pending_.empty()) {
-    sink->push_back(pending_.back());
-    pending_.pop_back();
-    n--;
-    cur_idx_++;
-  }
-
-  const uint8_t *sse_safe_pos = data_.data() + data_.size() - 17;
-  if (n == 0) goto ret;
-
-  // Now grab groups of 4 and append to vector
-  while (n >= 4) {
-    uint32_t ints[4];
-    if (cur_pos_ < sse_safe_pos) {
-      cur_pos_ = DecodeGroupVarInt32_SSE_Add(
-        cur_pos_, ints, min_elem_xmm);
-    } else {
-      cur_pos_ = DecodeGroupVarInt32_SlowButSafe(
-        cur_pos_, &ints[0], &ints[1], &ints[2], &ints[3]);
-      ints[0] += min_elem_;
-      ints[1] += min_elem_;
-      ints[2] += min_elem_;
-      ints[3] += min_elem_;
-    }
-    cur_idx_ += 4;
-
-    sink->push_back(ints[0]);
-    sink->push_back(ints[1]);
-    sink->push_back(ints[2]);
-    sink->push_back(ints[3]);
-    n -= 4;
-  }
-
-  if (n == 0) goto ret;
-
-  // Grab next batch into pending_
-  // Note that this does _not_ increment cur_idx_
-  uint32_t ints[4];
-  cur_pos_ = DecodeGroupVarInt32_SlowButSafe(
-    cur_pos_, &ints[0], &ints[1], &ints[2], &ints[3]);
-
-  DCHECK_LE(cur_pos_, &data_[0] + data_.size())
-    << "Overflowed end of buffer! cur_pos=" << cur_pos_
-    << " data=" << data_.data() << " size=" << data_.size();
-
-  ints[0] += min_elem_;
-  ints[1] += min_elem_;
-  ints[2] += min_elem_;
-  ints[3] += min_elem_;
-  // pending_ acts like a stack, so push in reverse order.
-  pending_.push_back(ints[3]);
-  pending_.push_back(ints[2]);
-  pending_.push_back(ints[1]);
-  pending_.push_back(ints[0]);
-
-  while (n > 0 && !pending_.empty()) {
-    sink->push_back(pending_.back());
-    pending_.pop_back();
-    n--;
-    cur_idx_++;
-  }
-
-  ret:
-  CHECK_EQ(n, 0);
-  *n_param = cur_idx_ - start_idx;
-  return Status::OK();
-}
-
-} // namespace cfile
-} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/2b5ad4a9/src/kudu/cfile/gvint_block.h
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/gvint_block.h b/src/kudu/cfile/gvint_block.h
deleted file mode 100644
index d8d3042..0000000
--- a/src/kudu/cfile/gvint_block.h
+++ /dev/null
@@ -1,165 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-#ifndef KUDU_CFILE_GVINT_BLOCK_H
-#define KUDU_CFILE_GVINT_BLOCK_H
-
-#include <stdint.h>
-
-#include <vector>
-
-#include <gtest/gtest_prod.h>
-
-#include "kudu/cfile/block_encodings.h"
-
-namespace kudu {
-namespace cfile {
-
-struct WriterOptions;
-typedef uint32_t IntType;
-
-using std::vector;
-
-// Builder for an encoded block of ints.
-// The encoding is group-varint plus frame-of-reference:
-//
-// Header group (gvint): <num_elements, min_element, [unused], [unused]
-// followed by enough group varints to represent the total number of
-// elements, including padding 0s at the end. Each element is a delta
-// from the min_element frame-of-reference.
-//
-// See AppendGroupVarInt32(...) for details on the varint
-// encoding.
-class GVIntBlockBuilder final : public BlockBuilder {
- public:
-  explicit GVIntBlockBuilder(const WriterOptions *options);
-
-  bool IsBlockFull() const override;
-
-  int Add(const uint8_t *vals_buf, size_t count) OVERRIDE;
-
-  Slice Finish(rowid_t ordinal_pos) OVERRIDE;
-
-  void Reset() OVERRIDE;
-
-  size_t Count() const OVERRIDE;
-
-  // Return the first added key.
-  // key should be a uint32_t *
-  Status GetFirstKey(void *key) const OVERRIDE;
-
-  // Return the last added key.
-  // key should be a uint32_t *
-  Status GetLastKey(void *key) const OVERRIDE;
-
- private:
-
-  // TODO: this currently does not do a good job of estimating
-  // when the ints are large but clustered together,
-  // since it doesn't take into account the delta coding relative
-  // to the min int. We could track the min int along the way
-  // but then we have extra branches in the add loop. Come back to this,
-  // probably the branches don't matter since this is write-side.
-  uint64_t EstimateEncodedSize() const {
-    return estimated_raw_size_ + (ints_.size() + 3) / 4
-    + kEstimatedHeaderSizeBytes + kTrailerExtraPaddingBytes;
-  }
-
-  friend class TestEncoding;
-  FRIEND_TEST(TestEncoding, TestGroupVarInt);
-  FRIEND_TEST(TestEncoding, TestIntBlockEncoder);
-
-  vector<IntType> ints_;
-  faststring buffer_;
-  uint64_t estimated_raw_size_;
-
-  const WriterOptions *options_;
-
-  enum {
-    kEstimatedHeaderSizeBytes = 10,
-
-    // Up to 3 "0s" can be tacked on the end of the block to round out
-    // the groups of 4
-    kTrailerExtraPaddingBytes = 3
-  };
-};
-
-// Decoder for UINT32 type, GROUP_VARINT coding
-class GVIntBlockDecoder final : public BlockDecoder {
- public:
-  explicit GVIntBlockDecoder(Slice slice);
-
-  Status ParseHeader() OVERRIDE;
-  void SeekToStart() {
-    SeekToPositionInBlock(0);
-  }
-
-  void SeekToPositionInBlock(uint pos) OVERRIDE;
-
-  Status SeekAtOrAfterValue(const void *value, bool *exact_match) OVERRIDE;
-
-  Status CopyNextValues(size_t *n, ColumnDataView *dst) OVERRIDE;
-
-  // Copy the integers to a temporary buffer, it is used by StringDictDecoder
-  // in its CopyNextValues() method.
-  Status CopyNextValuesToArray(size_t *n, uint8_t* array);
-
-  size_t GetCurrentIndex() const OVERRIDE {
-    DCHECK(parsed_) << "must parse header first";
-    return cur_idx_;
-  }
-
-  virtual rowid_t GetFirstRowId() const OVERRIDE {
-    return ordinal_pos_base_;
-  }
-
-  size_t Count() const OVERRIDE {
-    return num_elems_;
-  }
-
-  bool HasNext() const OVERRIDE {
-    return (num_elems_ - cur_idx_) > 0;
-  }
-
- private:
-  friend class TestEncoding;
-
-  template<class IntSink>
-  Status DoGetNextValues(size_t *n, IntSink *sink);
-
-  Slice data_;
-
-  bool parsed_;
-  const uint8_t *ints_start_;
-  uint32_t num_elems_;
-  uint32_t min_elem_;
-  rowid_t ordinal_pos_base_;
-
-  const uint8_t *cur_pos_;
-  size_t cur_idx_;
-
-  // Items that have been decoded but not yet yielded
-  // to the user. The next one to be yielded is at the
-  // *end* of the vector!
-  std::vector<uint32_t> pending_;
-
-  // Min Length of a header. (prefix + 4 tags)
-  static const size_t kMinHeaderSize = 5;
-};
-
-} // namespace cfile
-} // namespace kudu
-#endif

http://git-wip-us.apache.org/repos/asf/kudu/blob/2b5ad4a9/src/kudu/cfile/type_encodings.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/type_encodings.cc b/src/kudu/cfile/type_encodings.cc
index cb4d79a..f789c03 100644
--- a/src/kudu/cfile/type_encodings.cc
+++ b/src/kudu/cfile/type_encodings.cc
@@ -23,7 +23,6 @@
 #include <glog/logging.h>
 
 #include "kudu/cfile/bshuf_block.h"
-#include "kudu/cfile/gvint_block.h"
 #include "kudu/cfile/plain_bitmap_block.h"
 #include "kudu/cfile/plain_block.h"
 #include "kudu/cfile/rle_block.h"
@@ -168,23 +167,6 @@ struct DataTypeEncodingTraits<BINARY, DICT_ENCODING> {
   }
 };
 
-
-// Optimized grouping variable encoding for 32bit unsigned integers
-template<>
-struct DataTypeEncodingTraits<UINT32, GROUP_VARINT> {
-
-  static Status CreateBlockBuilder(BlockBuilder **bb, const WriterOptions 
*options) {
-    *bb = new GVIntBlockBuilder(options);
-    return Status::OK();
-  }
-
-  static Status CreateBlockDecoder(BlockDecoder **bd, const Slice &slice,
-                                   CFileIterator *iter) {
-    *bd = new GVIntBlockDecoder(slice);
-    return Status::OK();
-  }
-};
-
 template<DataType IntType>
 struct DataTypeEncodingTraits<IntType, RLE> {
 
@@ -271,7 +253,6 @@ class TypeEncodingResolver {
     AddMapping<INT16, PLAIN_ENCODING>();
     AddMapping<INT16, RLE>();
     AddMapping<INT16, BIT_SHUFFLE>();
-    AddMapping<UINT32, GROUP_VARINT>();
     AddMapping<UINT32, RLE>();
     AddMapping<UINT32, PLAIN_ENCODING>();
     AddMapping<UINT32, BIT_SHUFFLE>();

http://git-wip-us.apache.org/repos/asf/kudu/blob/2b5ad4a9/src/kudu/client/schema.h
----------------------------------------------------------------------
diff --git a/src/kudu/client/schema.h b/src/kudu/client/schema.h
index a7bb667..5bdba4d 100644
--- a/src/kudu/client/schema.h
+++ b/src/kudu/client/schema.h
@@ -58,10 +58,13 @@ class KUDU_EXPORT KuduColumnStorageAttributes {
     AUTO_ENCODING = 0,
     PLAIN_ENCODING = 1,
     PREFIX_ENCODING = 2,
-    GROUP_VARINT = 3,
     RLE = 4,
     DICT_ENCODING = 5,
-    BIT_SHUFFLE = 6
+    BIT_SHUFFLE = 6,
+
+    /// @deprecated GROUP_VARINT is not supported for valid types, and
+    /// will fall back to another encoding on the server side.
+    GROUP_VARINT = 3
   };
 
   /// @brief Column compression types.

http://git-wip-us.apache.org/repos/asf/kudu/blob/2b5ad4a9/src/kudu/common/common.proto
----------------------------------------------------------------------
diff --git a/src/kudu/common/common.proto b/src/kudu/common/common.proto
index 6a512dc..c85e65b 100644
--- a/src/kudu/common/common.proto
+++ b/src/kudu/common/common.proto
@@ -53,6 +53,7 @@ enum EncodingType {
   AUTO_ENCODING = 0;
   PLAIN_ENCODING = 1;
   PREFIX_ENCODING = 2;
+  // GROUP_VARINT encoding is deprecated and no longer implemented.
   GROUP_VARINT = 3;
   RLE = 4;
   DICT_ENCODING = 5;

http://git-wip-us.apache.org/repos/asf/kudu/blob/2b5ad4a9/src/kudu/tablet/diskrowset-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/diskrowset-test.cc 
b/src/kudu/tablet/diskrowset-test.cc
index 9f2c0d2..2da7600 100644
--- a/src/kudu/tablet/diskrowset-test.cc
+++ b/src/kudu/tablet/diskrowset-test.cc
@@ -426,10 +426,10 @@ TEST_F(TestRowSet, TestRollingDiskRowSetWriter) {
                                  64 * 1024); // roll every 64KB
   DoWriteTestRowSet(10000, &writer);
 
-  // Should have rolled 4 times.
+  // Should have rolled 5 times.
   vector<shared_ptr<RowSetMetadata> > metas;
   writer.GetWrittenRowSetMetadata(&metas);
-  EXPECT_EQ(4, metas.size());
+  EXPECT_EQ(5, metas.size());
   for (const shared_ptr<RowSetMetadata>& meta : metas) {
     ASSERT_TRUE(meta->HasDataForColumnIdForTests(schema_.column_id(0)));
   }

Reply via email to