Repository: kudu Updated Branches: refs/heads/master 1678b3dc9 -> d1f53cc32
KUDU-2085. Fix crash when seeking past end of prefix-encoded blocks This fixes a bug when seeking past the last element of a prefix-encoded binary block. In very specific circumstances, described in the JIRA, this would cause a Status::Corruption() to be returned. Prior to the fix, the updated test would fail pretty reliably when looped 50-100 times. With the patch, I looped it 1000 times with no failure. Change-Id: I1670b2244d586e4920f0603c48f65ed993a3b12b Reviewed-on: http://gerrit.cloudera.org:8080/7545 Tested-by: Kudu Jenkins Reviewed-by: David Ribeiro Alves <davidral...@gmail.com> Reviewed-by: Andrew Wong <aw...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d1f53cc3 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d1f53cc3 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d1f53cc3 Branch: refs/heads/master Commit: d1f53cc323d5d07e79304765fe171f535c1d548a Parents: 1678b3d Author: Todd Lipcon <t...@apache.org> Authored: Mon Jul 31 18:09:01 2017 -0700 Committer: Todd Lipcon <t...@apache.org> Committed: Mon Aug 7 23:13:21 2017 +0000 ---------------------------------------------------------------------- src/kudu/cfile/binary_prefix_block.cc | 9 +++++- src/kudu/cfile/encoding-test.cc | 48 +++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/d1f53cc3/src/kudu/cfile/binary_prefix_block.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/binary_prefix_block.cc b/src/kudu/cfile/binary_prefix_block.cc index c045895..97749bc 100644 --- a/src/kudu/cfile/binary_prefix_block.cc +++ b/src/kudu/cfile/binary_prefix_block.cc @@ -288,6 +288,13 @@ void BinaryPrefixBlockDecoder::SeekToPositionInBlock(uint pos) { DCHECK_LE(pos, num_elems_); + // Seeking past the last element is valid -- it just sets us to the + // same state as if we have read all of the elements. + if (pos == num_elems_) { + cur_idx_ = pos; + return; + } + int target_restart = pos/restart_interval_; SeekToRestartPoint(target_restart); @@ -302,7 +309,7 @@ void BinaryPrefixBlockDecoder::SeekToPositionInBlock(uint pos) { // point. Note that the restart points in the file do not include // the '0' restart point, since that is simply the beginning of // the data and hence a waste of space. So, 'idx' may range from -// 0 (first record) through num_restarts_ (last recorded restart point) +// 0 (first record) through num_restarts_ (exclusive). const uint8_t * BinaryPrefixBlockDecoder::GetRestartPoint(uint32_t idx) const { DCHECK_LE(idx, num_restarts_); http://git-wip-us.apache.org/repos/asf/kudu/blob/d1f53cc3/src/kudu/cfile/encoding-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/encoding-test.cc b/src/kudu/cfile/encoding-test.cc index db59936..7c12854 100644 --- a/src/kudu/cfile/encoding-test.cc +++ b/src/kudu/cfile/encoding-test.cc @@ -37,6 +37,7 @@ #include "kudu/util/hexdump.h" #include "kudu/util/memory/arena.h" #include "kudu/util/random.h" +#include "kudu/util/random_util.h" #include "kudu/util/stopwatch.h" #include "kudu/util/test_macros.h" #include "kudu/util/test_util.h" @@ -70,8 +71,7 @@ class TestEncoding : public KuduTest { ASSERT_EQ(1, n); } - // Insert a given number of strings into the provided - // BinaryPrefixBlockBuilder. + // Insert a given number of strings into the provided BlockBuilder. template<class BuilderType> static Slice CreateBinaryBlock(BuilderType *sbb, int num_items, @@ -234,12 +234,39 @@ class TestEncoding : public KuduTest { } } + // Create a printf-style format string of the form 'XXXXXXXX%d' where the 'X' characters + // are random bytes (not including '%', possibly including non-printable characters). + static string RandomFormatString(Random* r) { + char buf[8]; + RandomString(buf, arraysize(buf), r); + string format_string(buf, arraysize(buf)); + for (int i = 0; i < format_string.size(); i++) { + if (format_string[i] == '%') { + format_string[i] = 'x'; + } + } + return format_string + "%d"; + } + template<class BuilderType, class DecoderType> void TestBinaryBlockRoundTrip() { gscoped_ptr<WriterOptions> opts(NewWriterOptions()); BuilderType sbb(opts.get()); - const uint kCount = 10; - Slice s = CreateBinaryBlock(&sbb, kCount, "hello %d"); + + // Generate a random format string which uses some binary data. + // Using random data helps the ability to trigger bugs like KUDU-2085. + Random r(SeedRandom()); + const string& format_string = RandomFormatString(&r); + const auto& GenTestString = [&](int x) { + return StringPrintf(format_string.c_str(), x); + }; + + // Use a random number of elements. This is necessary to trigger bugs + // like KUDU-2085 that only occur in specific cases such as when + // the number of elements is a multiple of the 'restart interval' + // in prefix-encoded blocks. + const uint kCount = r.Uniform(1000); + Slice s = CreateBinaryBlock(&sbb, kCount, format_string.c_str()); LOG(INFO) << "Block: " << HexDump(s); @@ -249,9 +276,9 @@ class TestEncoding : public KuduTest { // Check first/last keys Slice key; ASSERT_OK(sbb.GetFirstKey(&key)); - ASSERT_EQ("hello 0", key); + ASSERT_EQ(GenTestString(0), key); ASSERT_OK(sbb.GetLastKey(&key)); - ASSERT_EQ(StringPrintf("hello %d", kCount - 1), key); + ASSERT_EQ(GenTestString(kCount - 1), key); DecoderType sbd(s); ASSERT_OK(sbd.ParseHeader()); @@ -266,7 +293,7 @@ class TestEncoding : public KuduTest { ASSERT_TRUE(sbd.HasNext()) << "Failed on iter " << i; Slice s; CopyOne<STRING>(&sbd, &s); - string expected = StringPrintf("hello %d", i); + string expected = GenTestString(i); ASSERT_EQ(expected, s.ToString()) << "failed at iter " << i; } ASSERT_FALSE(sbd.HasNext()); @@ -277,6 +304,11 @@ class TestEncoding : public KuduTest { ASSERT_EQ(i, sbd.GetCurrentIndex()); } + // Test the special case of seeking to the end of the block. + sbd.SeekToPositionInBlock(kCount); + ASSERT_EQ(kCount, sbd.GetCurrentIndex()); + ASSERT_FALSE(sbd.HasNext()); + // Try to request a bunch of data in one go ScopedColumnBlock<STRING> cb(kCount + 10); ColumnDataView cdv(&cb); @@ -287,7 +319,7 @@ class TestEncoding : public KuduTest { ASSERT_FALSE(sbd.HasNext()); for (uint i = 0; i < kCount; i++) { - string expected = StringPrintf("hello %d", i); + string expected = GenTestString(i); ASSERT_EQ(expected, cb[i].ToString()); } }