Repository: kudu
Updated Branches:
  refs/heads/master cf67b59ec -> 2aa1b883c


KUDU-2312: Scan predicate application ordering is non-deterministic

This changes the scan predicate evaluation ordering so that it primarily
orders by selectivity (as before), but breaks ties by column index.

Change-Id: I99b2cabecd8626cad7e11fbdd492af7276e08348
Reviewed-on: http://gerrit.cloudera.org:8080/9440
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <t...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/2aa1b883
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/2aa1b883
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/2aa1b883

Branch: refs/heads/master
Commit: 2aa1b883c3a875f554d787ccebabdf7450d56a58
Parents: cf67b59
Author: Dan Burkert <danburk...@apache.org>
Authored: Tue Feb 20 11:17:48 2018 -0800
Committer: Dan Burkert <danburk...@apache.org>
Committed: Tue Mar 6 19:15:58 2018 +0000

----------------------------------------------------------------------
 docs/release_notes.adoc                   |  6 ++
 src/kudu/common/generic_iterators-test.cc | 82 +++++++++++++++++++++++++-
 src/kudu/common/generic_iterators.cc      | 34 +++++++----
 src/kudu/common/generic_iterators.h       | 14 +++--
 src/kudu/tablet/cfile_set.h               |  1 -
 5 files changed, 117 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/2aa1b883/docs/release_notes.adoc
----------------------------------------------------------------------
diff --git a/docs/release_notes.adoc b/docs/release_notes.adoc
index 9e2f2a7..c965829 100644
--- a/docs/release_notes.adoc
+++ b/docs/release_notes.adoc
@@ -57,6 +57,12 @@
 [[rn_1.7.0_fixed_issues]]
 == Fixed Issues
 
+* KUDU-2312: The evaluation order of predicates in scans with multiple
+  predicates has been made deterministic. Due to a bug, this was not 
necessarily
+  the case previously. Predicates are applied in most to least selective order,
+  with ties broken by column index. The evaluation order may change in the
+  future, particularly when better column statistics are made available
+  internally.
 
 [[rn_1.7.0_wire_compatibility]]
 == Wire Protocol compatibility

http://git-wip-us.apache.org/repos/asf/kudu/blob/2aa1b883/src/kudu/common/generic_iterators-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/generic_iterators-test.cc 
b/src/kudu/common/generic_iterators-test.cc
index e7f9250..55eaa09 100644
--- a/src/kudu/common/generic_iterators-test.cc
+++ b/src/kudu/common/generic_iterators-test.cc
@@ -51,6 +51,7 @@ DEFINE_int32(num_lists, 3, "Number of lists to merge");
 DEFINE_int32(num_rows, 1000, "Number of entries per list");
 DEFINE_int32(num_iters, 1, "Number of times to run merge");
 
+using std::make_shared;
 using std::shared_ptr;
 using std::string;
 using std::vector;
@@ -309,7 +310,7 @@ TEST(TestPredicateEvaluatingIterator, 
TestPredicateEvaluation) {
 
   ASSERT_EQ(0, spec.predicates().size())
     << "Iterator tree should have accepted predicate";
-  ASSERT_EQ(1, pred_eval->col_idx_predicates_.size())
+  ASSERT_EQ(1, pred_eval->col_predicates_.size())
     << "Predicate should be evaluated by the outer iterator";
 
   Arena arena(1024);
@@ -338,4 +339,83 @@ TEST(TestPredicateEvaluatingIterator, 
TestDontWrapWhenNoPredicates) {
   ASSERT_EQ(outer_iter, materializing) << "InitAndMaybeWrap should not have 
wrapped iter";
 }
 
+// Test row-wise iterator which does nothing.
+class DummyIterator : public RowwiseIterator {
+ public:
+
+  explicit DummyIterator(Schema schema)
+      : schema_(std::move(schema)) {
+  }
+
+  Status Init(ScanSpec* /*spec*/) override {
+    return Status::OK();
+  }
+
+  virtual Status NextBlock(RowBlock* /*dst*/) override {
+    LOG(FATAL) << "unimplemented!";
+    return Status::OK();
+  }
+
+  virtual bool HasNext() const override {
+    LOG(FATAL) << "unimplemented!";
+    return false;
+  }
+
+  virtual string ToString() const override {
+    return "DummyIterator";
+  }
+
+  virtual const Schema& schema() const override {
+    return schema_;
+  }
+
+  virtual void GetIteratorStats(vector<IteratorStats>* stats) const override {
+    stats->resize(schema().num_columns());
+  }
+
+ private:
+  Schema schema_;
+};
+
+TEST(TestPredicateEvaluatingIterator, TestPredicateEvaluationOrder) {
+  Schema schema({ ColumnSchema("a_int64", INT64),
+                  ColumnSchema("b_int64", INT64),
+                  ColumnSchema("c_int32", INT32) }, 3);
+
+  int64_t zero = 0;
+  int64_t two = 2;
+  auto a_equality = ColumnPredicate::Equality(schema.column(0), &zero);
+  auto b_equality = ColumnPredicate::Equality(schema.column(1), &zero);
+  auto c_equality = ColumnPredicate::Equality(schema.column(2), &zero);
+  auto a_range = ColumnPredicate::Range(schema.column(0), &zero, &two);
+
+  { // Test that more selective predicates come before others.
+    ScanSpec spec;
+    spec.AddPredicate(a_range);
+    spec.AddPredicate(b_equality);
+    spec.AddPredicate(c_equality);
+
+    shared_ptr<RowwiseIterator> iter = make_shared<DummyIterator>(schema);
+    ASSERT_OK(PredicateEvaluatingIterator::InitAndMaybeWrap(&iter, &spec));
+
+    PredicateEvaluatingIterator* pred_eval = 
down_cast<PredicateEvaluatingIterator*>(iter.get());
+    ASSERT_TRUE(pred_eval->col_predicates_ ==
+                vector<ColumnPredicate>({ c_equality, b_equality, a_range }));
+  }
+
+  { // Test that smaller columns come before larger ones, and ties are broken 
by idx.
+    ScanSpec spec;
+    spec.AddPredicate(b_equality);
+    spec.AddPredicate(a_equality);
+    spec.AddPredicate(c_equality);
+
+    shared_ptr<RowwiseIterator> iter = make_shared<DummyIterator>(schema);
+    ASSERT_OK(PredicateEvaluatingIterator::InitAndMaybeWrap(&iter, &spec));
+
+    PredicateEvaluatingIterator* pred_eval = 
down_cast<PredicateEvaluatingIterator*>(iter.get());
+    ASSERT_TRUE(pred_eval->col_predicates_ ==
+                vector<ColumnPredicate>({ c_equality, a_equality, b_equality 
}));
+  }
+}
+
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/2aa1b883/src/kudu/common/generic_iterators.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/generic_iterators.cc 
b/src/kudu/common/generic_iterators.cc
index 76971ac..7a26e4f 100644
--- a/src/kudu/common/generic_iterators.cc
+++ b/src/kudu/common/generic_iterators.cc
@@ -23,7 +23,6 @@
 #include <memory>
 #include <mutex>
 #include <string>
-#include <tuple>
 #include <unordered_map>
 #include <utility>
 
@@ -44,14 +43,13 @@
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/memory/arena.h"
 
-using std::all_of;
 using std::get;
 using std::move;
+using std::pair;
 using std::remove_if;
 using std::shared_ptr;
 using std::sort;
 using std::string;
-using std::tuple;
 using std::unique_ptr;
 using std::vector;
 
@@ -491,11 +489,13 @@ Status MaterializingIterator::Init(ScanSpec *spec) {
     }
   }
 
-  // Sort the predicates by selectivity so that the most selective are 
evaluated earlier.
+  // Sort the predicates by selectivity so that the most selective are 
evaluated
+  // earlier, with ties broken by the column index.
   sort(col_idx_predicates_.begin(), col_idx_predicates_.end(),
-       [] (const tuple<int32_t, ColumnPredicate>& left,
-           const tuple<int32_t, ColumnPredicate>& right) {
-         return SelectivityComparator(get<1>(left), get<1>(right));
+       [] (const pair<int32_t, ColumnPredicate>& left,
+           const pair<int32_t, ColumnPredicate>& right) {
+          int comp = SelectivityComparator(left.second, right.second);
+          return comp ? comp < 0 : left.first < right.first;
        });
 
   return Status::OK();
@@ -581,6 +581,7 @@ 
PredicateEvaluatingIterator::PredicateEvaluatingIterator(shared_ptr<RowwiseItera
 Status PredicateEvaluatingIterator::InitAndMaybeWrap(
   shared_ptr<RowwiseIterator> *base_iter, ScanSpec *spec) {
   RETURN_NOT_OK((*base_iter)->Init(spec));
+
   if (spec != nullptr && !spec->predicates().empty()) {
     // Underlying iterator did not accept all predicates. Wrap it.
     shared_ptr<RowwiseIterator> wrapper(new 
PredicateEvaluatingIterator(*base_iter));
@@ -596,15 +597,22 @@ Status PredicateEvaluatingIterator::Init(ScanSpec *spec) {
 
   // Gather any predicates that the base iterator did not pushdown, and remove
   // the predicates from the spec.
-  col_idx_predicates_.clear();
-  col_idx_predicates_.reserve(spec->predicates().size());
+  col_predicates_.clear();
+  col_predicates_.reserve(spec->predicates().size());
   for (auto& predicate : spec->predicates()) {
-    col_idx_predicates_.emplace_back(predicate.second);
+    col_predicates_.emplace_back(predicate.second);
   }
   spec->RemovePredicates();
 
-  // Sort the predicates by selectivity so that the most selective are 
evaluated earlier.
-  sort(col_idx_predicates_.begin(), col_idx_predicates_.end(), 
SelectivityComparator);
+  // Sort the predicates by selectivity so that the most selective are 
evaluated
+  // earlier, with ties broken by the column index.
+  sort(col_predicates_.begin(), col_predicates_.end(),
+       [&] (const ColumnPredicate& left, const ColumnPredicate& right) {
+          int comp = SelectivityComparator(left, right);
+          if (comp != 0) return comp < 0;
+          return schema().find_column(left.column().name())
+               < schema().find_column(right.column().name());
+       });
 
   return Status::OK();
 }
@@ -616,7 +624,7 @@ bool PredicateEvaluatingIterator::HasNext() const {
 Status PredicateEvaluatingIterator::NextBlock(RowBlock *dst) {
   RETURN_NOT_OK(base_iter_->NextBlock(dst));
 
-  for (const auto& predicate : col_idx_predicates_) {
+  for (const auto& predicate : col_predicates_) {
     int32_t col_idx = dst->schema().find_column(predicate.column().name());
     if (col_idx == Schema::kColumnNotFound) {
       return Status::InvalidArgument("Unknown column in predicate", 
predicate.ToString());

http://git-wip-us.apache.org/repos/asf/kudu/blob/2aa1b883/src/kudu/common/generic_iterators.h
----------------------------------------------------------------------
diff --git a/src/kudu/common/generic_iterators.h 
b/src/kudu/common/generic_iterators.h
index d90562d..49ed9b5 100644
--- a/src/kudu/common/generic_iterators.h
+++ b/src/kudu/common/generic_iterators.h
@@ -23,7 +23,7 @@
 #include <memory>
 #include <ostream>
 #include <string>
-#include <tuple>
+#include <utility>
 #include <vector>
 
 #include <glog/logging.h>
@@ -203,8 +203,9 @@ class MaterializingIterator : public RowwiseIterator {
 
   std::shared_ptr<ColumnwiseIterator> iter_;
 
-  // List of (column index, predicate) in order of most to least selective.
-  std::vector<std::tuple<int32_t, ColumnPredicate>> col_idx_predicates_;
+  // List of (column index, predicate) in order of most to least selective, 
with
+  // ties broken by the index.
+  std::vector<std::pair<int32_t, ColumnPredicate>> col_idx_predicates_;
 
   // List of column indexes without predicates to materialize.
   std::vector<int32_t> non_predicate_column_indexes_;
@@ -248,17 +249,20 @@ class PredicateEvaluatingIterator : public 
RowwiseIterator {
   }
 
  private:
+
   // Construct the evaluating iterator.
   // This is only called from ::InitAndMaybeWrap()
   // REQUIRES: base_iter is already Init()ed.
   explicit PredicateEvaluatingIterator(std::shared_ptr<RowwiseIterator> 
base_iter);
 
   FRIEND_TEST(TestPredicateEvaluatingIterator, TestPredicateEvaluation);
+  FRIEND_TEST(TestPredicateEvaluatingIterator, TestPredicateEvaluationOrder);
 
   std::shared_ptr<RowwiseIterator> base_iter_;
 
-  // List of (column index, predicate) in order of most to least selective.
-  std::vector<ColumnPredicate> col_idx_predicates_;
+  // List of predicates in order of most to least selective, with
+  // ties broken by the column index.
+  std::vector<ColumnPredicate> col_predicates_;
 };
 
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/2aa1b883/src/kudu/tablet/cfile_set.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/cfile_set.h b/src/kudu/tablet/cfile_set.h
index cfd28d2..e107c21 100644
--- a/src/kudu/tablet/cfile_set.h
+++ b/src/kudu/tablet/cfile_set.h
@@ -120,7 +120,6 @@ class CFileSet : public 
std::enable_shared_from_this<CFileSet> {
 
  private:
   friend class Iterator;
-  friend class CFileSetIteratorProjector;
 
   DISALLOW_COPY_AND_ASSIGN(CFileSet);
 

Reply via email to