KUDU-1651: Check empty dict before decoder eval A completely null dictionary-encoded column will yield an empty dictionary. When decoder-level evaluation is on, this will result in an error when creating the bitmap of matching codewords, as bitmaps must have a non-zero size.
In this scenario, it is sufficient to not determine the set of matching codewords. The decoder should be completely skipped over given a null column, so the matching codeword set would not be used here. If the decoder is not skipped over and an attempt is made to read from the dictionary block, an error should be thrown. Change-Id: I76d457c65238b3b2283d826076fdc7525d032e3f Reviewed-on: http://gerrit.cloudera.org:8080/4537 Tested-by: Kudu Jenkins Reviewed-by: Dan Burkert <[email protected]> (cherry picked from commit 1f40913de6d3427847f3d435e1b84fb928a6f6a9) Reviewed-on: http://gerrit.cloudera.org:8080/4606 Reviewed-by: Todd Lipcon <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/298aff77 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/298aff77 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/298aff77 Branch: refs/heads/branch-1.0.x Commit: 298aff7709b23a7fe7ab793b0dceaf7f397bd64e Parents: 2f12037 Author: Andrew Wong <[email protected]> Authored: Sun Sep 25 21:10:17 2016 -0700 Committer: Dan Burkert <[email protected]> Committed: Mon Oct 3 21:45:20 2016 +0000 ---------------------------------------------------------------------- src/kudu/cfile/binary_dict_block.cc | 1 + src/kudu/cfile/cfile_reader.cc | 15 ++++++++------- src/kudu/tablet/tablet-decoder-eval-test.cc | 13 ++++++++++--- 3 files changed, 19 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/298aff77/src/kudu/cfile/binary_dict_block.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/binary_dict_block.cc b/src/kudu/cfile/binary_dict_block.cc index 478b1cc..30d571a 100644 --- a/src/kudu/cfile/binary_dict_block.cc +++ b/src/kudu/cfile/binary_dict_block.cc @@ -264,6 +264,7 @@ Status BinaryDictBlockDecoder::CopyNextAndEval(size_t* n, // Predicates that have no matching words should return no data. SelectionVector* codewords_matching_pred = parent_cfile_iter_->GetCodeWordsMatchingPredicate(); + CHECK(codewords_matching_pred != nullptr); if (!codewords_matching_pred->AnySelected()) { // If nothing is selected, move the data_decoder_ pointer forward and clear // the corresponding bits in the selection vector. http://git-wip-us.apache.org/repos/asf/kudu/blob/298aff77/src/kudu/cfile/cfile_reader.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/cfile_reader.cc b/src/kudu/cfile/cfile_reader.cc index 67999f5..8b2d42c 100644 --- a/src/kudu/cfile/cfile_reader.cc +++ b/src/kudu/cfile/cfile_reader.cc @@ -906,12 +906,14 @@ Status CFileIterator::Scan(ColumnMaterializationContext* ctx) { // yet been determined for this CFile. if (dict_decoder_ && ctx->DecoderEvalNotDisabled() && !codewords_matching_pred_) { size_t nwords = dict_decoder_->Count(); - codewords_matching_pred_.reset(new SelectionVector(nwords)); - codewords_matching_pred_->SetAllFalse(); - for (size_t i = 0; i < nwords; i++) { - Slice cur_string = dict_decoder_->string_at_index(i); - if (ctx->pred()->EvaluateCell<BINARY>(static_cast<const void *>(&cur_string))) { - BitmapSet(codewords_matching_pred_->mutable_bitmap(), i); + if (nwords > 0) { + codewords_matching_pred_.reset(new SelectionVector(nwords)); + codewords_matching_pred_->SetAllFalse(); + for (size_t i = 0; i < nwords; i++) { + Slice cur_string = dict_decoder_->string_at_index(i); + if (ctx->pred()->EvaluateCell<BINARY>(static_cast<const void *>(&cur_string))) { + BitmapSet(codewords_matching_pred_->mutable_bitmap(), i); + } } } } @@ -940,7 +942,6 @@ Status CFileIterator::Scan(ColumnMaterializationContext* ctx) { } size_t this_batch = nblock; if (not_null) { - // TODO: Maybe copy all and shift later? if (ctx->DecoderEvalNotDisabled()) { RETURN_NOT_OK(pb->dblk_->CopyNextAndEval(&this_batch, ctx, http://git-wip-us.apache.org/repos/asf/kudu/blob/298aff77/src/kudu/tablet/tablet-decoder-eval-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet-decoder-eval-test.cc b/src/kudu/tablet/tablet-decoder-eval-test.cc index e35f416..6aba34b 100644 --- a/src/kudu/tablet/tablet-decoder-eval-test.cc +++ b/src/kudu/tablet/tablet-decoder-eval-test.cc @@ -31,9 +31,9 @@ using strings::Substitute; enum Setup { #ifdef ADDRESS_SANITIZER - SMALL = 100, MEDIUM = 3000, LARGE = 10000 + EMPTY = 0, SMALL = 100, MEDIUM = 3000, LARGE = 10000 #else - SMALL = 100, MEDIUM = 5000, LARGE = 100000 + EMPTY = 0, SMALL = 100, MEDIUM = 5000, LARGE = 100000 #endif }; @@ -287,6 +287,12 @@ TEST_P(TabletDecoderEvalTest, NullableHighCardinality) { TestNullableScanAndFilter(50000, 30, 200, 75); } +TEST_P(TabletDecoderEvalTest, CompletelyNullColumn) { + // Fill a tablet with pattern [0, 50) but with all values being NULL. + // Query for values [30, 50). + TestNullableScanAndFilter(50, 30, 50, 50); +} + TEST_P(TabletDecoderEvalTest, MultipleColumns) { // Fill a tablet with pattern [0, 10) and query a:[0, 5) AND b:[3, 10). // To be considered correct, returned columns must align as they do in the @@ -294,7 +300,8 @@ TEST_P(TabletDecoderEvalTest, MultipleColumns) { TestMultipleColumnPredicates(10, 3, 5); } -INSTANTIATE_TEST_CASE_P(DecoderEvaluation, TabletDecoderEvalTest, ::testing::Values(SMALL, +INSTANTIATE_TEST_CASE_P(DecoderEvaluation, TabletDecoderEvalTest, ::testing::Values(EMPTY, + SMALL, MEDIUM, LARGE));
