Repository: kudu Updated Branches: refs/heads/master 643c92212 -> 1f40913de
KUDU-1651: Check empty dict before decoder eval A completely null dictionary-encoded column will yield an empty dictionary. When decoder-level evaluation is on, this will result in an error when creating the bitmap of matching codewords, as bitmaps must have a non-zero size. In this scenario, it is sufficient to not determine the set of matching codewords. The decoder should be completely skipped over given a null column, so the matching codeword set would not be used here. If the decoder is not skipped over and an attempt is made to read from the dictionary block, an error should be thrown. Change-Id: I76d457c65238b3b2283d826076fdc7525d032e3f Reviewed-on: http://gerrit.cloudera.org:8080/4537 Tested-by: Kudu Jenkins Reviewed-by: Dan Burkert <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/1f40913d Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/1f40913d Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/1f40913d Branch: refs/heads/master Commit: 1f40913de6d3427847f3d435e1b84fb928a6f6a9 Parents: 643c922 Author: Andrew Wong <[email protected]> Authored: Sun Sep 25 21:10:17 2016 -0700 Committer: Dan Burkert <[email protected]> Committed: Mon Sep 26 20:59:39 2016 +0000 ---------------------------------------------------------------------- src/kudu/cfile/binary_dict_block.cc | 1 + src/kudu/cfile/cfile_reader.cc | 15 ++++++++------- src/kudu/tablet/tablet-decoder-eval-test.cc | 13 ++++++++++--- 3 files changed, 19 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/1f40913d/src/kudu/cfile/binary_dict_block.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/binary_dict_block.cc b/src/kudu/cfile/binary_dict_block.cc index 478b1cc..30d571a 100644 --- a/src/kudu/cfile/binary_dict_block.cc +++ b/src/kudu/cfile/binary_dict_block.cc @@ -264,6 +264,7 @@ Status BinaryDictBlockDecoder::CopyNextAndEval(size_t* n, // Predicates that have no matching words should return no data. SelectionVector* codewords_matching_pred = parent_cfile_iter_->GetCodeWordsMatchingPredicate(); + CHECK(codewords_matching_pred != nullptr); if (!codewords_matching_pred->AnySelected()) { // If nothing is selected, move the data_decoder_ pointer forward and clear // the corresponding bits in the selection vector. http://git-wip-us.apache.org/repos/asf/kudu/blob/1f40913d/src/kudu/cfile/cfile_reader.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/cfile_reader.cc b/src/kudu/cfile/cfile_reader.cc index 67999f5..8b2d42c 100644 --- a/src/kudu/cfile/cfile_reader.cc +++ b/src/kudu/cfile/cfile_reader.cc @@ -906,12 +906,14 @@ Status CFileIterator::Scan(ColumnMaterializationContext* ctx) { // yet been determined for this CFile. if (dict_decoder_ && ctx->DecoderEvalNotDisabled() && !codewords_matching_pred_) { size_t nwords = dict_decoder_->Count(); - codewords_matching_pred_.reset(new SelectionVector(nwords)); - codewords_matching_pred_->SetAllFalse(); - for (size_t i = 0; i < nwords; i++) { - Slice cur_string = dict_decoder_->string_at_index(i); - if (ctx->pred()->EvaluateCell<BINARY>(static_cast<const void *>(&cur_string))) { - BitmapSet(codewords_matching_pred_->mutable_bitmap(), i); + if (nwords > 0) { + codewords_matching_pred_.reset(new SelectionVector(nwords)); + codewords_matching_pred_->SetAllFalse(); + for (size_t i = 0; i < nwords; i++) { + Slice cur_string = dict_decoder_->string_at_index(i); + if (ctx->pred()->EvaluateCell<BINARY>(static_cast<const void *>(&cur_string))) { + BitmapSet(codewords_matching_pred_->mutable_bitmap(), i); + } } } } @@ -940,7 +942,6 @@ Status CFileIterator::Scan(ColumnMaterializationContext* ctx) { } size_t this_batch = nblock; if (not_null) { - // TODO: Maybe copy all and shift later? if (ctx->DecoderEvalNotDisabled()) { RETURN_NOT_OK(pb->dblk_->CopyNextAndEval(&this_batch, ctx, http://git-wip-us.apache.org/repos/asf/kudu/blob/1f40913d/src/kudu/tablet/tablet-decoder-eval-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet-decoder-eval-test.cc b/src/kudu/tablet/tablet-decoder-eval-test.cc index ed43024..991d3c4 100644 --- a/src/kudu/tablet/tablet-decoder-eval-test.cc +++ b/src/kudu/tablet/tablet-decoder-eval-test.cc @@ -36,9 +36,9 @@ using strings::Substitute; enum Setup { #ifdef ADDRESS_SANITIZER - SMALL = 100, MEDIUM = 3000, LARGE = 10000 + EMPTY = 0, SMALL = 100, MEDIUM = 3000, LARGE = 10000 #else - SMALL = 100, MEDIUM = 5000, LARGE = 100000 + EMPTY = 0, SMALL = 100, MEDIUM = 5000, LARGE = 100000 #endif }; @@ -287,6 +287,12 @@ TEST_P(TabletDecoderEvalTest, NullableHighCardinality) { TestNullableScanAndFilter(50000, 30, 200, 75); } +TEST_P(TabletDecoderEvalTest, CompletelyNullColumn) { + // Fill a tablet with pattern [0, 50) but with all values being NULL. + // Query for values [30, 50). + TestNullableScanAndFilter(50, 30, 50, 50); +} + TEST_P(TabletDecoderEvalTest, MultipleColumns) { // Fill a tablet with pattern [0, 10) and query a:[0, 5) AND b:[3, 10). // To be considered correct, returned columns must align as they do in the @@ -294,7 +300,8 @@ TEST_P(TabletDecoderEvalTest, MultipleColumns) { TestMultipleColumnPredicates(10, 3, 5); } -INSTANTIATE_TEST_CASE_P(DecoderEvaluation, TabletDecoderEvalTest, ::testing::Values(SMALL, +INSTANTIATE_TEST_CASE_P(DecoderEvaluation, TabletDecoderEvalTest, ::testing::Values(EMPTY, + SMALL, MEDIUM, LARGE));
