Repository: kudu
Updated Branches:
  refs/heads/branch-1.2.x 0ba5c0f33 -> f2b1af0ed


KUDU-2085. Fix crash when seeking past end of prefix-encoded blocks

This fixes a bug when seeking past the last element of a prefix-encoded
binary block. In very specific circumstances, described in the JIRA,
this would cause a Status::Corruption() to be returned.

Prior to the fix, the updated test would fail pretty reliably when
looped 50-100 times. With the patch, I looped it 1000 times with no
failure.

Change-Id: I1670b2244d586e4920f0603c48f65ed993a3b12b
Reviewed-on: http://gerrit.cloudera.org:8080/7545
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <davidral...@gmail.com>
Reviewed-by: Andrew Wong <aw...@cloudera.com>
(cherry picked from commit d1f53cc323d5d07e79304765fe171f535c1d548a)
Reviewed-on: http://gerrit.cloudera.org:8080/7613


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/f2b1af0e
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/f2b1af0e
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/f2b1af0e

Branch: refs/heads/branch-1.2.x
Commit: f2b1af0edf59ecd30e87dfb1eb577f7c13773b78
Parents: 0ba5c0f
Author: Todd Lipcon <t...@apache.org>
Authored: Mon Jul 31 18:09:01 2017 -0700
Committer: Todd Lipcon <t...@apache.org>
Committed: Tue Aug 8 17:34:49 2017 +0000

----------------------------------------------------------------------
 src/kudu/cfile/binary_prefix_block.cc |  9 +++++-
 src/kudu/cfile/encoding-test.cc       | 52 ++++++++++++++++++++++++------
 2 files changed, 51 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/f2b1af0e/src/kudu/cfile/binary_prefix_block.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/binary_prefix_block.cc 
b/src/kudu/cfile/binary_prefix_block.cc
index cc6eab7..cc2a5b8 100644
--- a/src/kudu/cfile/binary_prefix_block.cc
+++ b/src/kudu/cfile/binary_prefix_block.cc
@@ -288,6 +288,13 @@ void BinaryPrefixBlockDecoder::SeekToPositionInBlock(uint 
pos) {
 
   DCHECK_LE(pos, num_elems_);
 
+  // Seeking past the last element is valid -- it just sets us to the
+  // same state as if we have read all of the elements.
+  if (pos == num_elems_) {
+    cur_idx_ = pos;
+    return;
+  }
+
   int target_restart = pos/restart_interval_;
   SeekToRestartPoint(target_restart);
 
@@ -302,7 +309,7 @@ void BinaryPrefixBlockDecoder::SeekToPositionInBlock(uint 
pos) {
 // point. Note that the restart points in the file do not include
 // the '0' restart point, since that is simply the beginning of
 // the data and hence a waste of space. So, 'idx' may range from
-// 0 (first record) through num_restarts_ (last recorded restart point)
+// 0 (first record) through num_restarts_ (exclusive).
 const uint8_t * BinaryPrefixBlockDecoder::GetRestartPoint(uint32_t idx) const {
   DCHECK_LE(idx, num_restarts_);
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/f2b1af0e/src/kudu/cfile/encoding-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/encoding-test.cc b/src/kudu/cfile/encoding-test.cc
index f667bf7..07c87d1 100644
--- a/src/kudu/cfile/encoding-test.cc
+++ b/src/kudu/cfile/encoding-test.cc
@@ -36,8 +36,11 @@
 #include "kudu/util/group_varint-inl.h"
 #include "kudu/util/hexdump.h"
 #include "kudu/util/memory/arena.h"
-#include "kudu/util/test_macros.h"
+#include "kudu/util/random.h"
+#include "kudu/util/random_util.h"
 #include "kudu/util/stopwatch.h"
+#include "kudu/util/test_macros.h"
+#include "kudu/util/test_util.h"
 
 using std::unique_ptr;
 using std::vector;
@@ -68,8 +71,7 @@ class TestEncoding : public ::testing::Test {
     ASSERT_EQ(1, n);
   }
 
-  // Insert a given number of strings into the provided
-  // BinaryPrefixBlockBuilder.
+  // Insert a given number of strings into the provided BlockBuilder.
   template<class BuilderType>
   static Slice CreateBinaryBlock(BuilderType *sbb,
                                  int num_items,
@@ -232,12 +234,39 @@ class TestEncoding : public ::testing::Test {
     }
   }
 
+  // Create a printf-style format string of the form 'XXXXXXXX%d' where the 
'X' characters
+  // are random bytes (not including '%', possibly including non-printable 
characters).
+  static string RandomFormatString(Random* r) {
+    char buf[8];
+    RandomString(buf, arraysize(buf), r);
+    string format_string(buf, arraysize(buf));
+    for (int i = 0; i < format_string.size(); i++) {
+      if (format_string[i] == '%') {
+        format_string[i] = 'x';
+      }
+    }
+    return format_string + "%d";
+  }
+
   template<class BuilderType, class DecoderType>
   void TestBinaryBlockRoundTrip() {
     gscoped_ptr<WriterOptions> opts(NewWriterOptions());
     BuilderType sbb(opts.get());
-    const uint kCount = 10;
-    Slice s = CreateBinaryBlock(&sbb, kCount, "hello %d");
+
+    // Generate a random format string which uses some binary data.
+    // Using random data helps the ability to trigger bugs like KUDU-2085.
+    Random r(SeedRandom());
+    const string& format_string = RandomFormatString(&r);
+    const auto& GenTestString = [&](int x) -> string {
+      return StringPrintf(format_string.c_str(), x);
+    };
+
+    // Use a random number of elements. This is necessary to trigger bugs
+    // like KUDU-2085 that only occur in specific cases such as when
+    // the number of elements is a multiple of the 'restart interval'
+    // in prefix-encoded blocks.
+    const uint kCount = r.Uniform(1000);
+    Slice s = CreateBinaryBlock(&sbb, kCount, format_string.c_str());
 
     LOG(INFO) << "Block: " << HexDump(s);
 
@@ -247,9 +276,9 @@ class TestEncoding : public ::testing::Test {
     // Check first/last keys
     Slice key;
     ASSERT_OK(sbb.GetFirstKey(&key));
-    ASSERT_EQ("hello 0", key);
+    ASSERT_EQ(GenTestString(0), key);
     ASSERT_OK(sbb.GetLastKey(&key));
-    ASSERT_EQ(StringPrintf("hello %d", kCount - 1), key);
+    ASSERT_EQ(GenTestString(kCount - 1), key);
 
     DecoderType sbd(s);
     ASSERT_OK(sbd.ParseHeader());
@@ -264,7 +293,7 @@ class TestEncoding : public ::testing::Test {
       ASSERT_TRUE(sbd.HasNext()) << "Failed on iter " << i;
       Slice s;
       CopyOne<STRING>(&sbd, &s);
-      string expected = StringPrintf("hello %d", i);
+      string expected = GenTestString(i);
       ASSERT_EQ(expected, s.ToString()) << "failed at iter " << i;
     }
     ASSERT_FALSE(sbd.HasNext());
@@ -275,6 +304,11 @@ class TestEncoding : public ::testing::Test {
       ASSERT_EQ(i, sbd.GetCurrentIndex());
     }
 
+    // Test the special case of seeking to the end of the block.
+    sbd.SeekToPositionInBlock(kCount);
+    ASSERT_EQ(kCount, sbd.GetCurrentIndex());
+    ASSERT_FALSE(sbd.HasNext());
+
     // Try to request a bunch of data in one go
     ScopedColumnBlock<STRING> cb(kCount + 10);
     ColumnDataView cdv(&cb);
@@ -285,7 +319,7 @@ class TestEncoding : public ::testing::Test {
     ASSERT_FALSE(sbd.HasNext());
 
     for (uint i = 0; i < kCount; i++) {
-      string expected = StringPrintf("hello %d", i);
+      string expected = GenTestString(i);
       ASSERT_EQ(expected, cb[i].ToString());
     }
   }

Reply via email to