Repository: kudu
Updated Branches:
  refs/heads/master 1678b3dc9 -> d1f53cc32


KUDU-2085. Fix crash when seeking past end of prefix-encoded blocks

This fixes a bug when seeking past the last element of a prefix-encoded
binary block. In very specific circumstances, described in the JIRA,
this would cause a Status::Corruption() to be returned.

Prior to the fix, the updated test would fail pretty reliably when
looped 50-100 times. With the patch, I looped it 1000 times with no
failure.

Change-Id: I1670b2244d586e4920f0603c48f65ed993a3b12b
Reviewed-on: http://gerrit.cloudera.org:8080/7545
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <davidral...@gmail.com>
Reviewed-by: Andrew Wong <aw...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d1f53cc3
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d1f53cc3
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d1f53cc3

Branch: refs/heads/master
Commit: d1f53cc323d5d07e79304765fe171f535c1d548a
Parents: 1678b3d
Author: Todd Lipcon <t...@apache.org>
Authored: Mon Jul 31 18:09:01 2017 -0700
Committer: Todd Lipcon <t...@apache.org>
Committed: Mon Aug 7 23:13:21 2017 +0000

----------------------------------------------------------------------
 src/kudu/cfile/binary_prefix_block.cc |  9 +++++-
 src/kudu/cfile/encoding-test.cc       | 48 +++++++++++++++++++++++++-----
 2 files changed, 48 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/d1f53cc3/src/kudu/cfile/binary_prefix_block.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/binary_prefix_block.cc 
b/src/kudu/cfile/binary_prefix_block.cc
index c045895..97749bc 100644
--- a/src/kudu/cfile/binary_prefix_block.cc
+++ b/src/kudu/cfile/binary_prefix_block.cc
@@ -288,6 +288,13 @@ void BinaryPrefixBlockDecoder::SeekToPositionInBlock(uint 
pos) {
 
   DCHECK_LE(pos, num_elems_);
 
+  // Seeking past the last element is valid -- it just sets us to the
+  // same state as if we have read all of the elements.
+  if (pos == num_elems_) {
+    cur_idx_ = pos;
+    return;
+  }
+
   int target_restart = pos/restart_interval_;
   SeekToRestartPoint(target_restart);
 
@@ -302,7 +309,7 @@ void BinaryPrefixBlockDecoder::SeekToPositionInBlock(uint 
pos) {
 // point. Note that the restart points in the file do not include
 // the '0' restart point, since that is simply the beginning of
 // the data and hence a waste of space. So, 'idx' may range from
-// 0 (first record) through num_restarts_ (last recorded restart point)
+// 0 (first record) through num_restarts_ (exclusive).
 const uint8_t * BinaryPrefixBlockDecoder::GetRestartPoint(uint32_t idx) const {
   DCHECK_LE(idx, num_restarts_);
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/d1f53cc3/src/kudu/cfile/encoding-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/encoding-test.cc b/src/kudu/cfile/encoding-test.cc
index db59936..7c12854 100644
--- a/src/kudu/cfile/encoding-test.cc
+++ b/src/kudu/cfile/encoding-test.cc
@@ -37,6 +37,7 @@
 #include "kudu/util/hexdump.h"
 #include "kudu/util/memory/arena.h"
 #include "kudu/util/random.h"
+#include "kudu/util/random_util.h"
 #include "kudu/util/stopwatch.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
@@ -70,8 +71,7 @@ class TestEncoding : public KuduTest {
     ASSERT_EQ(1, n);
   }
 
-  // Insert a given number of strings into the provided
-  // BinaryPrefixBlockBuilder.
+  // Insert a given number of strings into the provided BlockBuilder.
   template<class BuilderType>
   static Slice CreateBinaryBlock(BuilderType *sbb,
                                  int num_items,
@@ -234,12 +234,39 @@ class TestEncoding : public KuduTest {
     }
   }
 
+  // Create a printf-style format string of the form 'XXXXXXXX%d' where the 
'X' characters
+  // are random bytes (not including '%', possibly including non-printable 
characters).
+  static string RandomFormatString(Random* r) {
+    char buf[8];
+    RandomString(buf, arraysize(buf), r);
+    string format_string(buf, arraysize(buf));
+    for (int i = 0; i < format_string.size(); i++) {
+      if (format_string[i] == '%') {
+        format_string[i] = 'x';
+      }
+    }
+    return format_string + "%d";
+  }
+
   template<class BuilderType, class DecoderType>
   void TestBinaryBlockRoundTrip() {
     gscoped_ptr<WriterOptions> opts(NewWriterOptions());
     BuilderType sbb(opts.get());
-    const uint kCount = 10;
-    Slice s = CreateBinaryBlock(&sbb, kCount, "hello %d");
+
+    // Generate a random format string which uses some binary data.
+    // Using random data helps the ability to trigger bugs like KUDU-2085.
+    Random r(SeedRandom());
+    const string& format_string = RandomFormatString(&r);
+    const auto& GenTestString = [&](int x) {
+      return StringPrintf(format_string.c_str(), x);
+    };
+
+    // Use a random number of elements. This is necessary to trigger bugs
+    // like KUDU-2085 that only occur in specific cases such as when
+    // the number of elements is a multiple of the 'restart interval'
+    // in prefix-encoded blocks.
+    const uint kCount = r.Uniform(1000);
+    Slice s = CreateBinaryBlock(&sbb, kCount, format_string.c_str());
 
     LOG(INFO) << "Block: " << HexDump(s);
 
@@ -249,9 +276,9 @@ class TestEncoding : public KuduTest {
     // Check first/last keys
     Slice key;
     ASSERT_OK(sbb.GetFirstKey(&key));
-    ASSERT_EQ("hello 0", key);
+    ASSERT_EQ(GenTestString(0), key);
     ASSERT_OK(sbb.GetLastKey(&key));
-    ASSERT_EQ(StringPrintf("hello %d", kCount - 1), key);
+    ASSERT_EQ(GenTestString(kCount - 1), key);
 
     DecoderType sbd(s);
     ASSERT_OK(sbd.ParseHeader());
@@ -266,7 +293,7 @@ class TestEncoding : public KuduTest {
       ASSERT_TRUE(sbd.HasNext()) << "Failed on iter " << i;
       Slice s;
       CopyOne<STRING>(&sbd, &s);
-      string expected = StringPrintf("hello %d", i);
+      string expected = GenTestString(i);
       ASSERT_EQ(expected, s.ToString()) << "failed at iter " << i;
     }
     ASSERT_FALSE(sbd.HasNext());
@@ -277,6 +304,11 @@ class TestEncoding : public KuduTest {
       ASSERT_EQ(i, sbd.GetCurrentIndex());
     }
 
+    // Test the special case of seeking to the end of the block.
+    sbd.SeekToPositionInBlock(kCount);
+    ASSERT_EQ(kCount, sbd.GetCurrentIndex());
+    ASSERT_FALSE(sbd.HasNext());
+
     // Try to request a bunch of data in one go
     ScopedColumnBlock<STRING> cb(kCount + 10);
     ColumnDataView cdv(&cb);
@@ -287,7 +319,7 @@ class TestEncoding : public KuduTest {
     ASSERT_FALSE(sbd.HasNext());
 
     for (uint i = 0; i < kCount; i++) {
-      string expected = StringPrintf("hello %d", i);
+      string expected = GenTestString(i);
       ASSERT_EQ(expected, cb[i].ToString());
     }
   }

Reply via email to