Repository: kudu
Updated Branches:
  refs/heads/master e709db1f3 -> f0716cc52


KUDU-2566: Enhance rowset tree pruning and stop copying strings

Three improvements:
1) Support open-ended intervals:
   Previously, the rowset tree could only compute overlap with a
   closed and bounded interval. This changeset adds the ability to
   find overlap for unbounded intervals as well. As a result, tablet
   iterators with a single primary key bound are more efficient
   because they do not iterate over rowsets that don't overlap with
   the key bound.
2) End up fetching one more rowset for the upper bound which is
   exclusive:
   This changeset also adds the ability to query the rowset tree
   using an exclusive upper bound, whereas previously only inclusive
   intervals were supported. This makes scans more efficient since
   primary key upper bounds are passed as exclusive bounds, so now
   rowsets that overlap only in the upper bound are excluded.
3) Perf improvement:
   Using raw slices instead of copying to strings while querying.
   The copying from slices to string is discarded, which could help
   to increase the query performance.

Change-Id: I0e34b4169f93f3519f694c9bf86ca5295c318e79
Reviewed-on: http://gerrit.cloudera.org:8080/11381
Tested-by: Kudu Jenkins
Reviewed-by: Will Berkeley <wdberke...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/f02a0c0f
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/f02a0c0f
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/f02a0c0f

Branch: refs/heads/master
Commit: f02a0c0f29b6ef5468137869764119b1a8f8b3ae
Parents: e709db1
Author: helifu <hzhel...@corp.netease.com>
Authored: Mon Sep 3 19:12:16 2018 +0800
Committer: Will Berkeley <wdberke...@gmail.com>
Committed: Tue Sep 11 16:25:48 2018 +0000

----------------------------------------------------------------------
 src/kudu/tablet/rowset_tree-test.cc | 174 +++++++++++++++++++++++++++++--
 src/kudu/tablet/rowset_tree.cc      |  32 ++++--
 src/kudu/tablet/rowset_tree.h       |  15 ++-
 src/kudu/tablet/tablet.cc           |  22 ++--
 src/kudu/util/interval_tree-inl.h   |  44 ++++----
 src/kudu/util/interval_tree-test.cc | 132 +++++++++++++++++------
 src/kudu/util/interval_tree.h       |  12 ++-
 7 files changed, 349 insertions(+), 82 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/f02a0c0f/src/kudu/tablet/rowset_tree-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_tree-test.cc 
b/src/kudu/tablet/rowset_tree-test.cc
index 9cfb7f7..a248a5f 100644
--- a/src/kudu/tablet/rowset_tree-test.cc
+++ b/src/kudu/tablet/rowset_tree-test.cc
@@ -18,15 +18,19 @@
 #include <algorithm>
 #include <cstdlib>
 #include <memory>
-#include <unordered_set>
 #include <string>
+#include <string.h>
+#include <unordered_set>
+#include <utility>
 #include <vector>
 
+#include <boost/optional/optional.hpp>
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/stringprintf.h"
+#include "kudu/gutil/strings/substitute.h"
 #include "kudu/tablet/mock-rowsets.h"
 #include "kudu/tablet/rowset.h"
 #include "kudu/tablet/rowset_tree.h"
@@ -39,6 +43,7 @@ using std::shared_ptr;
 using std::string;
 using std::unordered_set;
 using std::vector;
+using strings::Substitute;
 
 namespace kudu { namespace tablet {
 
@@ -86,30 +91,185 @@ TEST_F(TestRowSetTree, TestTree) {
   ASSERT_EQ(vec[3].get(), out[0]); // MemRowSet
   ASSERT_EQ(vec[0].get(), out[1]);
   ASSERT_EQ(vec[1].get(), out[2]);
+
+  // interval [3,4) overlaps 0-5, 3-5 and the MemRowSet
+  out.clear();
+  tree.FindRowSetsIntersectingInterval(Slice("3"), Slice("4"), &out);
+  ASSERT_EQ(3, out.size());
+  ASSERT_EQ(vec[3].get(), out[0]);
+  ASSERT_EQ(vec[0].get(), out[1]);
+  ASSERT_EQ(vec[1].get(), out[2]);
+
+  // interval [0,2) overlaps 0-5 and the MemRowSet
+  out.clear();
+  tree.FindRowSetsIntersectingInterval(Slice("0"), Slice("2"), &out);
+  ASSERT_EQ(2, out.size());
+  ASSERT_EQ(vec[3].get(), out[0]);
+  ASSERT_EQ(vec[0].get(), out[1]);
+
+  // interval [5,7) overlaps 0-5, 3-5, 5-9 and the MemRowSet
+  out.clear();
+  tree.FindRowSetsIntersectingInterval(Slice("5"), Slice("7"), &out);
+  ASSERT_EQ(4, out.size());
+  ASSERT_EQ(vec[3].get(), out[0]);
+  ASSERT_EQ(vec[0].get(), out[1]);
+  ASSERT_EQ(vec[1].get(), out[2]);
+  ASSERT_EQ(vec[2].get(), out[3]);
+
+  // "3" overlaps 0-5, 3-5, and the MemRowSet.
+  out.clear();
+  tree.FindRowSetsWithKeyInRange("3", &out);
+  ASSERT_EQ(3, out.size());
+  ASSERT_EQ(vec[3].get(), out[0]); // MemRowSet
+  ASSERT_EQ(vec[0].get(), out[1]);
+  ASSERT_EQ(vec[1].get(), out[2]);
 
-  // interval (2,4) overlaps 0-5, 3-5 and the MemRowSet
+  // "5" overlaps 0-5, 3-5, 5-9, and the MemRowSet
+  out.clear();
+  tree.FindRowSetsWithKeyInRange("5", &out);
+  ASSERT_EQ(4, out.size());
+  ASSERT_EQ(vec[3].get(), out[0]); // MemRowSet
+  ASSERT_EQ(vec[0].get(), out[1]);
+  ASSERT_EQ(vec[1].get(), out[2]);
+  ASSERT_EQ(vec[2].get(), out[3]);
+
+  // interval [0,5) overlaps 0-5, 3-5, and the MemRowSet
+  out.clear();
+  tree.FindRowSetsIntersectingInterval(Slice("0"), Slice("5"), &out);
+  ASSERT_EQ(3, out.size());
+  ASSERT_EQ(vec[3].get(), out[0]);
+  ASSERT_EQ(vec[0].get(), out[1]);
+  ASSERT_EQ(vec[1].get(), out[2]);
+
+  // interval [3,5) overlaps 0-5, 3-5 and the MemRowSet
   out.clear();
-  tree.FindRowSetsIntersectingInterval("3", "4", &out);
+  tree.FindRowSetsIntersectingInterval(Slice("3"), Slice("5"), &out);
   ASSERT_EQ(3, out.size());
   ASSERT_EQ(vec[3].get(), out[0]);
   ASSERT_EQ(vec[0].get(), out[1]);
   ASSERT_EQ(vec[1].get(), out[2]);
 
-  // interval (0,2) overlaps 0-5 and the MemRowSet
+  // interval [-OO,3) overlaps 0-5 and the MemRowSet
   out.clear();
-  tree.FindRowSetsIntersectingInterval("0", "2", &out);
+  tree.FindRowSetsIntersectingInterval(boost::none, Slice("3"), &out);
   ASSERT_EQ(2, out.size());
   ASSERT_EQ(vec[3].get(), out[0]);
   ASSERT_EQ(vec[0].get(), out[1]);
 
-  // interval (5,7) overlaps 0-5, 3-5, 5-9 and the MemRowSet
+  // interval [-OO,5) overlaps 0-5, 3-5 and the MemRowSet
+  out.clear();
+  tree.FindRowSetsIntersectingInterval(boost::none, Slice("5"), &out);
+  ASSERT_EQ(3, out.size());
+  ASSERT_EQ(vec[3].get(), out[0]);
+  ASSERT_EQ(vec[0].get(), out[1]);
+  ASSERT_EQ(vec[1].get(), out[2]);
+
+  // interval [-OO,99) overlaps 0-5, 3-5, 5-9 and the MemRowSet
   out.clear();
-  tree.FindRowSetsIntersectingInterval("5", "7", &out);
+  tree.FindRowSetsIntersectingInterval(boost::none, Slice("99"), &out);
   ASSERT_EQ(4, out.size());
   ASSERT_EQ(vec[3].get(), out[0]);
   ASSERT_EQ(vec[0].get(), out[1]);
   ASSERT_EQ(vec[1].get(), out[2]);
   ASSERT_EQ(vec[2].get(), out[3]);
+
+  // interval [6,+OO) overlaps 5-9 and the MemRowSet
+  out.clear();
+  tree.FindRowSetsIntersectingInterval(Slice("6"), boost::none, &out);
+  ASSERT_EQ(2, out.size());
+  ASSERT_EQ(vec[3].get(), out[0]);
+  ASSERT_EQ(vec[2].get(), out[1]);
+
+  // interval [5,+OO) overlaps 0-5, 3-5, 5-9 and the MemRowSet
+  out.clear();
+  tree.FindRowSetsIntersectingInterval(Slice("5"), boost::none, &out);
+  ASSERT_EQ(4, out.size());
+  ASSERT_EQ(vec[3].get(), out[0]);
+  ASSERT_EQ(vec[0].get(), out[1]);
+  ASSERT_EQ(vec[1].get(), out[2]);
+  ASSERT_EQ(vec[2].get(), out[3]);
+
+  // interval [4,+OO) overlaps 0-5, 3-5, 5-9 and the MemRowSet
+  out.clear();
+  tree.FindRowSetsIntersectingInterval(Slice("4"), boost::none, &out);
+  ASSERT_EQ(4, out.size());
+  ASSERT_EQ(vec[3].get(), out[0]);
+  ASSERT_EQ(vec[0].get(), out[1]);
+  ASSERT_EQ(vec[1].get(), out[2]);
+  ASSERT_EQ(vec[2].get(), out[3]);
+
+  // interval [-OO,+OO) overlaps 0-5, 3-5, 5-9 and the MemRowSet
+  out.clear();
+  tree.FindRowSetsIntersectingInterval(boost::none, boost::none, &out);
+  ASSERT_EQ(4, out.size());
+  ASSERT_EQ(vec[3].get(), out[0]);
+  ASSERT_EQ(vec[0].get(), out[1]);
+  ASSERT_EQ(vec[1].get(), out[2]);
+  ASSERT_EQ(vec[2].get(), out[3]);
+}
+
+TEST_F(TestRowSetTree, TestTreeRandomized) {
+  enum BoundOperator {
+    BOUND_LESS_THAN,
+    BOUND_LESS_EQUAL,
+    BOUND_GREATER_THAN,
+    BOUND_GREATER_EQUAL,
+    BOUND_EQUAL
+  };
+  const auto& GetStringPair = [] (const BoundOperator op) -> std::pair<string, 
string> {
+    while (true) {
+      string s1 = Substitute("$0", rand() % 100);
+      string s2 = Substitute("$0", rand() % 100);
+      int r = strcmp(s1.c_str(), s2.c_str());
+      switch (op) {
+      case BOUND_LESS_THAN:
+        if (r == 0) continue; // pass through.
+      case BOUND_LESS_EQUAL:
+        return std::pair<string, string>(std::min(s1, s2), std::max(s1, s2));
+      case BOUND_GREATER_THAN:
+        if (r == 0) continue; // pass through.
+      case BOUND_GREATER_EQUAL:
+        return std::pair<string, string>(std::max(s1, s2), std::min(s1, s2));
+      case BOUND_EQUAL:
+        return std::pair<string, string>(s1, s1);
+      }
+    }
+  };
+
+  SeedRandom();
+  RowSetVector vec;
+  for (int i = 0; i < 100; i++) {
+    std::pair<string, string> bound = GetStringPair(BOUND_LESS_EQUAL);
+    ASSERT_LE(bound.first, bound.second);
+    vec.push_back(shared_ptr<RowSet>(
+        new MockDiskRowSet(bound.first, bound.second)));
+  }
+  RowSetTree tree;
+  ASSERT_OK(tree.Reset(vec));
+
+  // When lower < upper.
+  vector<RowSet*> out;
+  for (int i = 0; i < 100; i++) {
+    out.clear();
+    std::pair<string, string> bound = GetStringPair(BOUND_LESS_THAN);
+    ASSERT_LT(bound.first, bound.second);
+    tree.FindRowSetsIntersectingInterval(
+        Slice(bound.first), Slice(bound.second), &out);
+    for (const auto& e : out) {
+      string min, max;
+      e->GetBounds(&min, &max);
+      if (min < bound.first) {
+        ASSERT_GE(max, bound.first);
+      } else {
+        ASSERT_LT(min, bound.second);
+      }
+      if (max >= bound.second) {
+        ASSERT_LT(min, bound.second);
+      } else {
+        ASSERT_GE(max, bound.first);
+      }
+    }
+  }
 }
 
 class TestRowSetTreePerformance : public TestRowSetTree,

http://git-wip-us.apache.org/repos/asf/kudu/blob/f02a0c0f/src/kudu/tablet/rowset_tree.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_tree.cc b/src/kudu/tablet/rowset_tree.cc
index 829477f..eceb631 100644
--- a/src/kudu/tablet/rowset_tree.cc
+++ b/src/kudu/tablet/rowset_tree.cc
@@ -101,6 +101,24 @@ struct RowSetIntervalTraits {
     return -compare(b, a);
   }
 
+  // When 'a' is boost::none:
+  //  (1)'a' is +OO when 'positive_direction' is true;
+  //  (2)'a' is -OO when 'positive_direction' is false.
+  static int compare(const boost::optional<Slice>& a,
+                     const Slice& b,
+                     const EndpointIfNone& type) {
+    if (a == boost::none) {
+      return ((POSITIVE_INFINITY == type) ? 1 : -1);
+    }
+
+    return compare(*a, b);
+  }
+
+  static int compare(const Slice& a,
+                     const boost::optional<Slice>& b,
+                     const EndpointIfNone& type) {
+    return -compare(b, a, type);
+  }
 };
 
 RowSetTree::RowSetTree()
@@ -173,9 +191,9 @@ Status RowSetTree::Reset(const RowSetVector &rowsets) {
   return Status::OK();
 }
 
-void RowSetTree::FindRowSetsIntersectingInterval(const Slice &lower_bound,
-                                                 const Slice &upper_bound,
-                                                 vector<RowSet *> *rowsets) 
const {
+void RowSetTree::FindRowSetsIntersectingInterval(const boost::optional<Slice>& 
lower_bound,
+                                                 const boost::optional<Slice>& 
upper_bound,
+                                                 vector<RowSet*>* rowsets) 
const {
   DCHECK(initted_);
 
   // All rowsets with unknown bounds need to be checked.
@@ -183,15 +201,9 @@ void RowSetTree::FindRowSetsIntersectingInterval(const 
Slice &lower_bound,
     rowsets->push_back(rs.get());
   }
 
-  // perf TODO: make it possible to query using raw Slices
-  // instead of copying to strings here
-  RowSetWithBounds query;
-  query.min_key = lower_bound.ToString();
-  query.max_key = upper_bound.ToString();
-
   vector<RowSetWithBounds *> from_tree;
   from_tree.reserve(all_rowsets_.size());
-  tree_->FindIntersectingInterval(&query, &from_tree);
+  tree_->FindIntersectingInterval(lower_bound, upper_bound, &from_tree);
   rowsets->reserve(rowsets->size() + from_tree.size());
   for (RowSetWithBounds *rs : from_tree) {
     rowsets->push_back(rs->rowset);

http://git-wip-us.apache.org/repos/asf/kudu/blob/f02a0c0f/src/kudu/tablet/rowset_tree.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_tree.h b/src/kudu/tablet/rowset_tree.h
index cc4adc5..7c566de 100644
--- a/src/kudu/tablet/rowset_tree.h
+++ b/src/kudu/tablet/rowset_tree.h
@@ -22,6 +22,8 @@
 #include <unordered_map>
 #include <vector>
 
+#include <boost/optional/optional.hpp>
+
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/tablet/rowset.h"
@@ -85,9 +87,16 @@ class RowSetTree {
   void ForEachRowSetContainingKeys(const std::vector<Slice>& encoded_keys,
                                    const std::function<void(RowSet*, int)>& 
cb) const;
 
-  void FindRowSetsIntersectingInterval(const Slice &lower_bound,
-                                       const Slice &upper_bound,
-                                       std::vector<RowSet *> *rowsets) const;
+  // When 'lower_bound' is boost::none, it means negative infinity.
+  // When 'upper_bound' is boost::none, it means positive infinity.
+  // So the query interval can be one of below:
+  //  [-OO, +OO)
+  //  [-OO, upper_bound)
+  //  [lower_bound, +OO)
+  //  [lower_bound, upper_bound)
+  void FindRowSetsIntersectingInterval(const boost::optional<Slice>& 
lower_bound,
+                                       const boost::optional<Slice>& 
upper_bound,
+                                       std::vector<RowSet*>* rowsets) const;
 
   const RowSetVector &all_rowsets() const { return all_rowsets_; }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/f02a0c0f/src/kudu/tablet/tablet.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc
index 7a44b91..b81f351 100644
--- a/src/kudu/tablet/tablet.cc
+++ b/src/kudu/tablet/tablet.cc
@@ -30,6 +30,7 @@
 #include <utility>
 #include <vector>
 
+#include <boost/optional/optional.hpp>
 #include <gflags/gflags.h>
 #include <glog/logging.h>
 
@@ -1778,16 +1779,13 @@ Status Tablet::CaptureConsistentIterators(
   opts.io_context = io_context;
 
   // Cull row-sets in the case of key-range queries.
-  if (spec != nullptr && spec->lower_bound_key() && 
spec->exclusive_upper_bound_key()) {
-    // TODO(todd): support open-ended intervals
-    // TODO(todd): the upper bound key is exclusive, but the RowSetTree
-    // function takes an inclusive interval. So, we might end up fetching one
-    // more rowset than necessary.
-    vector<RowSet *> interval_sets;
-    components_->rowsets->FindRowSetsIntersectingInterval(
-        spec->lower_bound_key()->encoded_key(),
-        spec->exclusive_upper_bound_key()->encoded_key(),
-        &interval_sets);
+  if (spec != nullptr && (spec->lower_bound_key() || 
spec->exclusive_upper_bound_key())) {
+    boost::optional<Slice> lower_bound = spec->lower_bound_key() ? \
+        boost::optional<Slice>(spec->lower_bound_key()->encoded_key()) : 
boost::none;
+    boost::optional<Slice> upper_bound = spec->exclusive_upper_bound_key() ? \
+        
boost::optional<Slice>(spec->exclusive_upper_bound_key()->encoded_key()) : 
boost::none;
+    vector<RowSet*> interval_sets;
+    components_->rowsets->FindRowSetsIntersectingInterval(lower_bound, 
upper_bound, &interval_sets);
     for (const RowSet *rs : interval_sets) {
       gscoped_ptr<RowwiseIterator> row_it;
       RETURN_NOT_OK_PREPEND(rs->NewRowIterator(opts, &row_it),
@@ -1799,8 +1797,8 @@ Status Tablet::CaptureConsistentIterators(
     return Status::OK();
   }
 
-  // If there are no encoded predicates or they represent an open-ended range, 
then
-  // fall back to grabbing all rowset iterators
+  // If there are no encoded predicates of the primary keys, then
+  // fall back to grabbing all rowset iterators.
   for (const shared_ptr<RowSet> &rs : components_->rowsets->all_rowsets()) {
     gscoped_ptr<RowwiseIterator> row_it;
     RETURN_NOT_OK_PREPEND(rs->NewRowIterator(opts, &row_it),

http://git-wip-us.apache.org/repos/asf/kudu/blob/f02a0c0f/src/kudu/util/interval_tree-inl.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/interval_tree-inl.h 
b/src/kudu/util/interval_tree-inl.h
index 7637317..a4fa1e5 100644
--- a/src/kudu/util/interval_tree-inl.h
+++ b/src/kudu/util/interval_tree-inl.h
@@ -58,10 +58,12 @@ void IntervalTree<Traits>::ForEachIntervalContainingPoints(
 
 
 template<class Traits>
-void IntervalTree<Traits>::FindIntersectingInterval(const interval_type &query,
-                                                    IntervalVector *results) 
const {
+template<class QueryPointType>
+void IntervalTree<Traits>::FindIntersectingInterval(const QueryPointType& 
lower_bound,
+                                                    const QueryPointType& 
upper_bound,
+                                                    IntervalVector* results) 
const {
   if (root_) {
-    root_->FindIntersectingInterval(query, results);
+    root_->FindIntersectingInterval(lower_bound, upper_bound, results);
   }
 }
 
@@ -178,8 +180,10 @@ class ITNode {
                                        const Callback& cb) const;
 
   // See IntervalTree::FindIntersectingInterval(...)
-  void FindIntersectingInterval(const interval_type &query,
-                                IntervalVector *results) const;
+  template<class QueryPointType>
+  void FindIntersectingInterval(const QueryPointType& lower_bound,
+                                const QueryPointType& upper_bound,
+                                IntervalVector* results) const;
 
  private:
   // Comparators for sorting lists of intervals.
@@ -384,40 +388,42 @@ void ITNode<Traits>::FindContainingPoint(const 
QueryPointType &query,
 }
 
 template<class Traits>
-void ITNode<Traits>::FindIntersectingInterval(const interval_type &query,
-                                              IntervalVector *results) const {
-  if (Traits::compare(Traits::get_right(query), split_point_) < 0) {
-    // The interval is fully left of the split point. So, it may not overlap
-    // with any in 'right_'
+template<class QueryPointType>
+void ITNode<Traits>::FindIntersectingInterval(const QueryPointType& 
lower_bound,
+                                              const QueryPointType& 
upper_bound,
+                                              IntervalVector* results) const {
+  if (Traits::compare(upper_bound, split_point_, POSITIVE_INFINITY) <= 0) {
+    // The interval is fully left of the split point and with split point.
+    // So, it may not overlap with any in 'right_'
     if (left_ != NULL) {
-      left_->FindIntersectingInterval(query, results);
+      left_->FindIntersectingInterval(lower_bound, upper_bound, results);
     }
 
-    // Any intervals whose left edge is <= the query interval's right edge
+    // Any interval whose left edge is < the query interval's right edge
     // intersect the query interval. 'std::partition_point' returns the first
     // such interval which does not meet that criterion, so we insert all
     // up to that point.
     auto first_greater = std::partition_point(
         overlapping_by_asc_left_.cbegin(), overlapping_by_asc_left_.cend(),
         [&](const interval_type& interval) {
-          return Traits::compare(Traits::get_left(interval), 
Traits::get_right(query)) <= 0;
+          return Traits::compare(Traits::get_left(interval), upper_bound, 
POSITIVE_INFINITY) < 0;
         });
     results->insert(results->end(), overlapping_by_asc_left_.cbegin(), 
first_greater);
-  } else if (Traits::compare(Traits::get_left(query), split_point_) > 0) {
+  } else if (Traits::compare(lower_bound, split_point_, NEGATIVE_INFINITY) > 
0) {
     // The interval is fully right of the split point. So, it may not overlap
     // with any in 'left_'.
     if (right_ != NULL) {
-      right_->FindIntersectingInterval(query, results);
+      right_->FindIntersectingInterval(lower_bound, upper_bound, results);
     }
 
-    // Any intervals whose right edge is >= the query interval's left edge
+    // Any interval whose right edge is >= the query interval's left edge
     // intersect the query interval. 'std::partition_point' returns the first
     // such interval which does not meet that criterion, so we insert all
     // up to that point.
     auto first_lesser = std::partition_point(
         overlapping_by_desc_right_.cbegin(), overlapping_by_desc_right_.cend(),
         [&](const interval_type& interval) {
-          return Traits::compare(Traits::get_right(interval), 
Traits::get_left(query)) >= 0;
+          return Traits::compare(Traits::get_right(interval), lower_bound, 
NEGATIVE_INFINITY) >= 0;
         });
     results->insert(results->end(), overlapping_by_desc_right_.cbegin(), 
first_lesser);
   } else {
@@ -428,10 +434,10 @@ void ITNode<Traits>::FindIntersectingInterval(const 
interval_type &query,
 
     // The query interval may _also_ intersect some in either child.
     if (left_ != NULL) {
-      left_->FindIntersectingInterval(query, results);
+      left_->FindIntersectingInterval(lower_bound, upper_bound, results);
     }
     if (right_ != NULL) {
-      right_->FindIntersectingInterval(query, results);
+      right_->FindIntersectingInterval(lower_bound, upper_bound, results);
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/kudu/blob/f02a0c0f/src/kudu/util/interval_tree-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/interval_tree-test.cc 
b/src/kudu/util/interval_tree-test.cc
index df143d3..a8c17e0 100644
--- a/src/kudu/util/interval_tree-test.cc
+++ b/src/kudu/util/interval_tree-test.cc
@@ -56,9 +56,35 @@ struct IntInterval {
         id(id) {
   }
 
-  bool Intersects(const IntInterval &other) const {
-    if (other.left > right) return false;
-    if (left > other.right) return false;
+  // boost::none means infinity.
+  // [left,  right] is closed interval.
+  // [lower, upper) is half-open interval, so the upper is exclusive.
+  bool Intersects(const boost::optional<int>& lower,
+                  const boost::optional<int>& upper) const {
+    if (lower == boost::none && upper == boost::none) {
+      //         [left, right]
+      //            |     |
+      // [-OO,                      +OO)
+    } else if (lower == boost::none) {
+      //         [left, right]
+      //            |
+      // [-OO,    upper)
+      if (*upper <= this->left) return false;
+    } else if (upper == boost::none) {
+      //         [left, right]
+      //                     \
+      //                      [lower, +OO)
+      if (*lower > this->right) return false;
+    } else {
+      //         [left, right]
+      //                     \
+      //                      [lower, upper)
+      if (*lower > this->right) return false;
+      //         [left, right]
+      //            |
+      // [lower,  upper)
+      if (*upper <= this->left) return false;
+    }
     return true;
   }
 
@@ -86,10 +112,10 @@ struct CountingQueryPoint {
 struct IntTraits {
   typedef int point_type;
   typedef IntInterval interval_type;
-  static point_type get_left(const IntInterval &x) {
+  static point_type get_left(const IntInterval& x) {
     return x.left;
   }
-  static point_type get_right(const IntInterval &x) {
+  static point_type get_right(const IntInterval& x) {
     return x.right;
   }
   static int compare(int a, int b) {
@@ -106,18 +132,33 @@ struct IntTraits {
     return -compare(b, a);
   }
 
+  static int compare(const boost::optional<int>& a,
+                     const int b,
+                     const EndpointIfNone& type) {
+    if (a == boost::none) {
+      return ((POSITIVE_INFINITY == type) ? 1 : -1);
+    }
+
+    return compare(*a, b);
+  }
+
+  static int compare(const int a,
+                     const boost::optional<int>& b,
+                     const EndpointIfNone& type) {
+    return -compare(b, a, type);
+  }
 };
 
 // Compare intervals in an arbitrary but consistent way - this is only
 // used for verifying that the two algorithms come up with the same results.
 // It's not necessary to define this to use an interval tree.
-static bool CompareIntervals(const IntInterval &a, const IntInterval &b) {
+static bool CompareIntervals(const IntInterval& a, const IntInterval& b) {
   return std::make_tuple(a.left, a.right, a.id) <
     std::make_tuple(b.left, b.right, b.id);
 }
 
 // Stringify a list of int intervals, for easy test error reporting.
-static string Stringify(const vector<IntInterval> &intervals) {
+static string Stringify(const vector<IntInterval>& intervals) {
   string ret;
   bool first = true;
   for (const IntInterval &interval : intervals) {
@@ -130,9 +171,9 @@ static string Stringify(const vector<IntInterval> 
&intervals) {
 }
 
 // Find any intervals in 'intervals' which contain 'query_point' by brute 
force.
-static void FindContainingBruteForce(const vector<IntInterval> &intervals,
+static void FindContainingBruteForce(const vector<IntInterval>& intervals,
                                      int query_point,
-                                     vector<IntInterval> *results) {
+                                     vector<IntInterval>* results) {
   for (const IntInterval &i : intervals) {
     if (query_point >= i.left && query_point <= i.right) {
       results->push_back(i);
@@ -142,11 +183,12 @@ static void FindContainingBruteForce(const 
vector<IntInterval> &intervals,
 
 
 // Find any intervals in 'intervals' which intersect 'query_interval' by brute 
force.
-static void FindIntersectingBruteForce(const vector<IntInterval> &intervals,
-                                       IntInterval query_interval,
-                                       vector<IntInterval> *results) {
-  for (const IntInterval &i : intervals) {
-    if (query_interval.Intersects(i)) {
+static void FindIntersectingBruteForce(const vector<IntInterval>& intervals,
+                                       const boost::optional<int>& lower,
+                                       const boost::optional<int>& upper,
+                                       vector<IntInterval>* results) {
+  for (const IntInterval& i : intervals) {
+    if (i.Intersects(lower, upper)) {
       results->push_back(i);
     }
   }
@@ -155,8 +197,8 @@ static void FindIntersectingBruteForce(const 
vector<IntInterval> &intervals,
 
 // Verify that IntervalTree::FindContainingPoint yields the same results as 
the naive
 // brute-force O(n) algorithm.
-static void VerifyFindContainingPoint(const vector<IntInterval> all_intervals,
-                                      const IntervalTree<IntTraits> &tree,
+static void VerifyFindContainingPoint(const vector<IntInterval>& all_intervals,
+                                      const IntervalTree<IntTraits>& tree,
                                       int query_point) {
   vector<IntInterval> results;
   tree.FindContainingPoint(query_point, &results);
@@ -166,26 +208,58 @@ static void VerifyFindContainingPoint(const 
vector<IntInterval> all_intervals,
   FindContainingBruteForce(all_intervals, query_point, &brute_force);
   std::sort(brute_force.begin(), brute_force.end(), CompareIntervals);
 
-  SCOPED_TRACE(Stringify(all_intervals) + StringPrintf(" (q=%d)", 
query_point));
+  SCOPED_TRACE(Stringify(all_intervals) + StringPrintf(" {q=%d}", 
query_point));
   EXPECT_EQ(Stringify(brute_force), Stringify(results));
 }
 
 // Verify that IntervalTree::FindIntersectingInterval yields the same results 
as the naive
 // brute-force O(n) algorithm.
-static void VerifyFindIntersectingInterval(const vector<IntInterval> 
all_intervals,
-                                           const IntervalTree<IntTraits> &tree,
-                                           const IntInterval &query_interval) {
-  vector<IntInterval> results;
-  tree.FindIntersectingInterval(query_interval, &results);
-  std::sort(results.begin(), results.end(), CompareIntervals);
+static void VerifyFindIntersectingInterval(const vector<IntInterval>& 
all_intervals,
+                                           const IntervalTree<IntTraits>& tree,
+                                           const IntInterval& query_interval) {
+  const auto& Process = [&] (const boost::optional<int>& lower,
+                             const boost::optional<int>& upper) {
+    vector<IntInterval> results;
+    tree.FindIntersectingInterval(lower, upper, &results);
+    std::sort(results.begin(), results.end(), CompareIntervals);
+
+    vector<IntInterval> brute_force;
+    FindIntersectingBruteForce(all_intervals, lower, upper, &brute_force);
+    std::sort(brute_force.begin(), brute_force.end(), CompareIntervals);
+    EXPECT_EQ(Stringify(brute_force), Stringify(results));
+  };
+
+  {
+    // [lower, upper)
+    boost::optional<int> lower = query_interval.left;
+    boost::optional<int> upper = query_interval.right;
+    SCOPED_TRACE(Stringify(all_intervals) + StringPrintf(" {q=[%d, %d)}", 
*lower, *upper));
+    Process(lower, upper);
+  }
 
-  vector<IntInterval> brute_force;
-  FindIntersectingBruteForce(all_intervals, query_interval, &brute_force);
-  std::sort(brute_force.begin(), brute_force.end(), CompareIntervals);
+  {
+    // [-OO, upper)
+    boost::optional<int> lower = boost::none;
+    boost::optional<int> upper = query_interval.right;
+    SCOPED_TRACE(Stringify(all_intervals) + StringPrintf(" {q=[-OO, %d)}", 
*upper));
+    Process(lower, upper);
+  }
 
-  SCOPED_TRACE(Stringify(all_intervals) +
-               StringPrintf(" (q=[%d,%d])", query_interval.left, 
query_interval.right));
-  EXPECT_EQ(Stringify(brute_force), Stringify(results));
+  {
+    // [lower, +OO)
+    boost::optional<int> lower = query_interval.left;
+    boost::optional<int> upper = boost::none;
+    SCOPED_TRACE(Stringify(all_intervals) + StringPrintf(" {q=[%d, +OO)}", 
*lower));
+    Process(lower, upper);
+  }
+
+  {
+    // [-OO, +OO)
+    boost::optional<int> lower = query_interval.left;
+    boost::optional<int> upper = boost::none;
+    SCOPED_TRACE(Stringify(all_intervals) + StringPrintf(" {q=[-OO, +OO)}"));
+    Process(lower, upper);
+  }
 }
 
 static vector<IntInterval> CreateRandomIntervals(int n = 100) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/f02a0c0f/src/kudu/util/interval_tree.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/interval_tree.h b/src/kudu/util/interval_tree.h
index a677528..25373db 100644
--- a/src/kudu/util/interval_tree.h
+++ b/src/kudu/util/interval_tree.h
@@ -36,6 +36,12 @@ template<class Traits>
 class ITNode;
 }
 
+// End point type when boost::none.
+enum EndpointIfNone {
+  POSITIVE_INFINITY,
+  NEGATIVE_INFINITY
+};
+
 // Implements an Interval Tree.
 //
 // An Interval Tree is a data structure which stores a set of intervals and 
supports
@@ -135,8 +141,10 @@ class IntervalTree {
   // Find all intervals in the tree which intersect the given interval.
   // The resulting intervals are added to the 'results' vector.
   // The vector is not cleared first.
-  void FindIntersectingInterval(const interval_type &query,
-                                IntervalVector *results) const;
+  template<class QueryPointType>
+  void FindIntersectingInterval(const QueryPointType& lower_bound,
+                                const QueryPointType& upper_bound,
+                                IntervalVector* results) const;
  private:
   static void Partition(const IntervalVector &in,
                         point_type *split_point,

Reply via email to