Repository: kudu
Updated Branches:
  refs/heads/master 643c92212 -> 1f40913de


KUDU-1651: Check empty dict before decoder eval

A completely null dictionary-encoded column will yield an empty
dictionary. When decoder-level evaluation is on, this will result in
an error when creating the bitmap of matching codewords, as bitmaps
must have a non-zero size.

In this scenario, it is sufficient to not determine the set of
matching codewords. The decoder should be completely skipped over
given a null column, so the matching codeword set would not be used
here. If the decoder is not skipped over and an attempt is made to
read from the dictionary block, an error should be thrown.

Change-Id: I76d457c65238b3b2283d826076fdc7525d032e3f
Reviewed-on: http://gerrit.cloudera.org:8080/4537
Tested-by: Kudu Jenkins
Reviewed-by: Dan Burkert <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/1f40913d
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/1f40913d
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/1f40913d

Branch: refs/heads/master
Commit: 1f40913de6d3427847f3d435e1b84fb928a6f6a9
Parents: 643c922
Author: Andrew Wong <[email protected]>
Authored: Sun Sep 25 21:10:17 2016 -0700
Committer: Dan Burkert <[email protected]>
Committed: Mon Sep 26 20:59:39 2016 +0000

----------------------------------------------------------------------
 src/kudu/cfile/binary_dict_block.cc         |  1 +
 src/kudu/cfile/cfile_reader.cc              | 15 ++++++++-------
 src/kudu/tablet/tablet-decoder-eval-test.cc | 13 ++++++++++---
 3 files changed, 19 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/1f40913d/src/kudu/cfile/binary_dict_block.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/binary_dict_block.cc 
b/src/kudu/cfile/binary_dict_block.cc
index 478b1cc..30d571a 100644
--- a/src/kudu/cfile/binary_dict_block.cc
+++ b/src/kudu/cfile/binary_dict_block.cc
@@ -264,6 +264,7 @@ Status BinaryDictBlockDecoder::CopyNextAndEval(size_t* n,
 
   // Predicates that have no matching words should return no data.
   SelectionVector* codewords_matching_pred = 
parent_cfile_iter_->GetCodeWordsMatchingPredicate();
+  CHECK(codewords_matching_pred != nullptr);
   if (!codewords_matching_pred->AnySelected()) {
     // If nothing is selected, move the data_decoder_ pointer forward and clear
     // the corresponding bits in the selection vector.

http://git-wip-us.apache.org/repos/asf/kudu/blob/1f40913d/src/kudu/cfile/cfile_reader.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile_reader.cc b/src/kudu/cfile/cfile_reader.cc
index 67999f5..8b2d42c 100644
--- a/src/kudu/cfile/cfile_reader.cc
+++ b/src/kudu/cfile/cfile_reader.cc
@@ -906,12 +906,14 @@ Status CFileIterator::Scan(ColumnMaterializationContext* 
ctx) {
   // yet been determined for this CFile.
   if (dict_decoder_ && ctx->DecoderEvalNotDisabled() && 
!codewords_matching_pred_) {
     size_t nwords = dict_decoder_->Count();
-    codewords_matching_pred_.reset(new SelectionVector(nwords));
-    codewords_matching_pred_->SetAllFalse();
-    for (size_t i = 0; i < nwords; i++) {
-      Slice cur_string = dict_decoder_->string_at_index(i);
-      if (ctx->pred()->EvaluateCell<BINARY>(static_cast<const void 
*>(&cur_string))) {
-        BitmapSet(codewords_matching_pred_->mutable_bitmap(), i);
+    if (nwords > 0) {
+      codewords_matching_pred_.reset(new SelectionVector(nwords));
+      codewords_matching_pred_->SetAllFalse();
+      for (size_t i = 0; i < nwords; i++) {
+        Slice cur_string = dict_decoder_->string_at_index(i);
+        if (ctx->pred()->EvaluateCell<BINARY>(static_cast<const void 
*>(&cur_string))) {
+          BitmapSet(codewords_matching_pred_->mutable_bitmap(), i);
+        }
       }
     }
   }
@@ -940,7 +942,6 @@ Status CFileIterator::Scan(ColumnMaterializationContext* 
ctx) {
         }
         size_t this_batch = nblock;
         if (not_null) {
-          // TODO: Maybe copy all and shift later?
           if (ctx->DecoderEvalNotDisabled()) {
             RETURN_NOT_OK(pb->dblk_->CopyNextAndEval(&this_batch,
                                                      ctx,

http://git-wip-us.apache.org/repos/asf/kudu/blob/1f40913d/src/kudu/tablet/tablet-decoder-eval-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet-decoder-eval-test.cc 
b/src/kudu/tablet/tablet-decoder-eval-test.cc
index ed43024..991d3c4 100644
--- a/src/kudu/tablet/tablet-decoder-eval-test.cc
+++ b/src/kudu/tablet/tablet-decoder-eval-test.cc
@@ -36,9 +36,9 @@ using strings::Substitute;
 
 enum Setup {
 #ifdef ADDRESS_SANITIZER
-  SMALL = 100, MEDIUM = 3000, LARGE = 10000
+  EMPTY = 0, SMALL = 100, MEDIUM = 3000, LARGE = 10000
 #else
-  SMALL = 100, MEDIUM = 5000, LARGE = 100000
+  EMPTY = 0, SMALL = 100, MEDIUM = 5000, LARGE = 100000
 #endif
 };
 
@@ -287,6 +287,12 @@ TEST_P(TabletDecoderEvalTest, NullableHighCardinality) {
   TestNullableScanAndFilter(50000, 30, 200, 75);
 }
 
+TEST_P(TabletDecoderEvalTest, CompletelyNullColumn) {
+  // Fill a tablet with pattern [0, 50) but with all values being NULL.
+  // Query for values [30, 50).
+  TestNullableScanAndFilter(50, 30, 50, 50);
+}
+
 TEST_P(TabletDecoderEvalTest, MultipleColumns) {
   // Fill a tablet with pattern [0, 10) and query a:[0, 5) AND b:[3, 10).
   // To be considered correct, returned columns must align as they do in the
@@ -294,7 +300,8 @@ TEST_P(TabletDecoderEvalTest, MultipleColumns) {
   TestMultipleColumnPredicates(10, 3, 5);
 }
 
-INSTANTIATE_TEST_CASE_P(DecoderEvaluation, TabletDecoderEvalTest, 
::testing::Values(SMALL,
+INSTANTIATE_TEST_CASE_P(DecoderEvaluation, TabletDecoderEvalTest, 
::testing::Values(EMPTY,
+                                                                               
     SMALL,
                                                                                
     MEDIUM,
                                                                                
     LARGE));
 

Reply via email to