This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 3f52d9ae0bde7bacf61d52ea82a898e035d4402e
Author: Todd Lipcon <[email protected]>
AuthorDate: Tue Dec 10 09:43:34 2019 -0800

    memrowset: small optimizations for scanning
    
    This adds two small optimizations for MRS/CBTree. They should have a
    small (maybe not noticeable) effect on user scans, but hopefully will
    also improve the speed of flushing by a few percent, which I found to be
    CPU bound when using an NVM disk for storage.
    
    - add a new getter to fetch the stored values without also accessing the
      stored keys. In most cases we don't care about the encoded key when
      accessing MRS, so we can avoid potentially following the key pointer
      (should avoid a cache miss)
    
    Shows a small effect in the modified benchmark:
    
    I1210 10:04:46.209149 15145 cbtree-test.cc:795] Time spent Scan 4000000 
keys 10 times (frozen): real 0.638s user 0.636s     sys 0.000s
    I1210 10:04:46.804275 15145 cbtree-test.cc:806] Time spent Scan 4000000 
keys 10 times (frozen, val-only): real 0.595s       user 0.596s     sys 0.000s
    
    ... though this one didn't show a major effect in memrowset-test on its
    own.
    
    - add prefetching of the MRS values during scanning
    
    Tested with:
    
      memrowset-test --gtest_filter=\*InsertCount\* 
--roundtrip_num_rows=10000000 --gtest_repeat=10
    
    t-test for the "all committed" result shows a significant (though <5%) 
effect:
    
    data:  subset(d, V1 == "before")$V2 and subset(d, V1 == "after-prefetch")$V2
    t = 3.4999, df = 18, p-value = 0.002557
    alternative hypothesis: true difference in means is not equal to 0
    95 percent confidence interval:
     0.004876512 0.019523488
    sample estimates:
    mean of x mean of y
       0.3622    0.3500
    
    Change-Id: Ia44b34606439625fbbbcc83e3652455a8894a0b3
    Reviewed-on: http://gerrit.cloudera.org:8080/14874
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <[email protected]>
    Reviewed-by: Bankim Bhavsar <[email protected]>
---
 src/kudu/tablet/cbtree-test.cc     | 82 ++++++++++++++++++++++++++------------
 src/kudu/tablet/concurrent_btree.h | 23 ++++++++++-
 src/kudu/tablet/memrowset.cc       |  7 ++--
 src/kudu/tablet/memrowset.h        |  7 ++--
 4 files changed, 83 insertions(+), 36 deletions(-)

diff --git a/src/kudu/tablet/cbtree-test.cc b/src/kudu/tablet/cbtree-test.cc
index 9231a6c..9352af6 100644
--- a/src/kudu/tablet/cbtree-test.cc
+++ b/src/kudu/tablet/cbtree-test.cc
@@ -15,9 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <cstdint>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <memory>
 #include <string>
 #include <thread>
 #include <unordered_set>
@@ -26,7 +28,6 @@
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
-#include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/stringprintf.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/tablet/concurrent_btree.h"
@@ -42,6 +43,7 @@
 
 using std::string;
 using std::thread;
+using std::unique_ptr;
 using std::unordered_set;
 using std::vector;
 using strings::Substitute;
@@ -259,7 +261,7 @@ void VerifyRange(const CBTree<T> &tree,
 template<class T>
 void InsertAndVerify(Barrier *go_barrier,
                      Barrier *done_barrier,
-                     gscoped_ptr<CBTree<T> > *tree,
+                     unique_ptr<CBTree<T>> *tree,
                      int start_idx,
                      int end_idx) {
   while (true) {
@@ -434,7 +436,7 @@ TEST_F(TestCBTree, TestRacyConcurrentInsert) {
 
 template<class TraitsClass>
 void TestCBTree::DoTestConcurrentInsert() {
-  gscoped_ptr<CBTree<TraitsClass> > tree;
+  unique_ptr<CBTree<TraitsClass>> tree;
 
   int num_threads = 16;
   int ins_per_thread = 30;
@@ -494,7 +496,7 @@ TEST_F(TestCBTree, TestIterator) {
   // now iterate through, making sure we saw all
   // the keys that were inserted
   LOG_TIMING(INFO, "Iterating") {
-    gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(
+    unique_ptr<CBTreeIterator<SmallFanoutTraits>> iter(
       t.NewIterator());
     bool exact;
     ASSERT_TRUE(iter->SeekAtOrAfter(Slice(""), &exact));
@@ -528,7 +530,7 @@ TEST_F(TestCBTree, TestIteratorRewind) {
   ASSERT_TRUE(t.Insert(Slice("key2"), Slice("val")));
   ASSERT_TRUE(t.Insert(Slice("key3"), Slice("val")));
 
-  gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(t.NewIterator());
+  unique_ptr<CBTreeIterator<SmallFanoutTraits>> iter(t.NewIterator());
   bool exact;
   ASSERT_TRUE(iter->SeekAtOrAfter(Slice(""), &exact));
 
@@ -568,7 +570,7 @@ TEST_F(TestCBTree, TestIteratorRewind) {
 TEST_F(TestCBTree, TestIteratorSeekOnEmptyTree) {
   CBTree<SmallFanoutTraits> t;
 
-  gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(
+  unique_ptr<CBTreeIterator<SmallFanoutTraits>> iter(
     t.NewIterator());
   bool exact = true;
   ASSERT_FALSE(iter->SeekAtOrAfter(Slice(""), &exact));
@@ -587,7 +589,7 @@ TEST_F(TestCBTree, TestIteratorSeekConditions) {
 
   // Seek to before first key should successfully reach first key
   {
-    gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(
+    unique_ptr<CBTreeIterator<SmallFanoutTraits>> iter(
       t.NewIterator());
 
     bool exact;
@@ -603,7 +605,7 @@ TEST_F(TestCBTree, TestIteratorSeekConditions) {
   // Seek to exactly first key should successfully reach first key
   // and set exact = true
   {
-    gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(
+    unique_ptr<CBTreeIterator<SmallFanoutTraits>> iter(
       t.NewIterator());
 
     bool exact;
@@ -619,7 +621,7 @@ TEST_F(TestCBTree, TestIteratorSeekConditions) {
   // Seek to exactly last key should successfully reach last key
   // and set exact = true
   {
-    gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(
+    unique_ptr<CBTreeIterator<SmallFanoutTraits>> iter(
       t.NewIterator());
 
     bool exact;
@@ -635,7 +637,7 @@ TEST_F(TestCBTree, TestIteratorSeekConditions) {
 
   // Seek to after last key should fail.
   {
-    gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(
+    unique_ptr<CBTreeIterator<SmallFanoutTraits>> iter(
       t.NewIterator());
 
     bool exact;
@@ -653,7 +655,7 @@ TEST_F(TestCBTree, TestIteratorSeekConditions) {
 template<class T>
 static void ScanThread(Barrier *go_barrier,
                        Barrier *done_barrier,
-                       gscoped_ptr<CBTree<T> > *tree) {
+                       unique_ptr<CBTree<T>> *tree) {
   while (true) {
     go_barrier->Wait();
     if (tree->get() == nullptr) return;
@@ -666,7 +668,7 @@ static void ScanThread(Barrier *go_barrier,
 
       faststring prev_key;
 
-      gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > 
iter((*tree)->NewIterator());
+      unique_ptr<CBTreeIterator<SmallFanoutTraits>> 
iter((*tree)->NewIterator());
       bool exact;
       iter->SeekAtOrAfter(Slice(""), &exact);
       while (iter->IsValid()) {
@@ -693,7 +695,7 @@ static void ScanThread(Barrier *go_barrier,
 // other threads repeatedly scan and verify that the results come back
 // in order.
 TEST_F(TestCBTree, TestConcurrentIterateAndInsert) {
-  gscoped_ptr<CBTree<SmallFanoutTraits> > tree;
+  unique_ptr<CBTree<SmallFanoutTraits>> tree;
 
   int num_ins_threads = 4;
   int num_scan_threads = 4;
@@ -749,6 +751,27 @@ TEST_F(TestCBTree, TestConcurrentIterateAndInsert) {
   }
 }
 
+template<bool VAL_ONLY>
+void DoScan(CBTree<BTreeTraits>* tree, int64_t* count, int64_t* total_len) {
+  unique_ptr<CBTreeIterator<BTreeTraits>> iter(
+      tree->NewIterator());
+  bool exact;
+  iter->SeekAtOrAfter(Slice(""), &exact);
+  while (iter->IsValid()) {
+    (*count)++;
+    Slice v;
+    if (VAL_ONLY) {
+      v = iter->GetCurrentValue();
+    } else {
+      Slice dummy;
+      iter->GetCurrentEntry(&dummy, &v);
+    }
+
+    *total_len += v.size();
+    iter->Next();
+  }
+}
+
 // Check the performance of scanning through a large tree.
 TEST_F(TestCBTree, TestScanPerformance) {
   CBTree<BTreeTraits> tree;
@@ -773,16 +796,23 @@ TEST_F(TestCBTree, TestScanPerformance) {
                                   n_keys, scan_trials,
                                   freeze ? "frozen" : "not frozen")) {
       for (int i = 0; i < 10; i++)  {
-        gscoped_ptr<CBTreeIterator<BTreeTraits> > iter(
-          tree.NewIterator());
-        bool exact;
-        iter->SeekAtOrAfter(Slice(""), &exact);
-        int count = 0;
-        while (iter->IsValid()) {
-          count++;
-          iter->Next();
-        }
+        int64_t count = 0;
+        int64_t total_len = 0;
+        DoScan<false>(&tree, &count, &total_len);
+        ASSERT_EQ(count, n_keys);
+        ASSERT_GT(total_len, 0);
+      }
+    }
+
+    LOG_TIMING(INFO, StringPrintf("Scan %d keys %d times (%s, val-only)",
+                                  n_keys, scan_trials,
+                                  freeze ? "frozen" : "not frozen")) {
+      for (int i = 0; i < 10; i++)  {
+        int64_t count = 0;
+        int64_t total_len = 0;
+        DoScan<true>(&tree, &count, &total_len);
         ASSERT_EQ(count, n_keys);
+        ASSERT_GT(total_len, 0);
       }
     }
   }
@@ -804,7 +834,7 @@ TEST_F(TestCBTree, TestIteratorSeekAtOrBefore) {
   // and set exact = true;
   for (int i = 500; i <= key_num; ++i) {
     string key = Substitute("key$0", i * 2);
-    gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(t.NewIterator());
+    unique_ptr<CBTreeIterator<SmallFanoutTraits>> iter(t.NewIterator());
     bool exact;
     ASSERT_TRUE(iter->SeekAtOrBefore(Slice(key), &exact));
     ASSERT_TRUE(exact);
@@ -817,7 +847,7 @@ TEST_F(TestCBTree, TestIteratorSeekAtOrBefore) {
 
   // Seek to before first key should fail.
   {
-    gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(t.NewIterator());
+    unique_ptr<CBTreeIterator<SmallFanoutTraits>> iter(t.NewIterator());
 
     bool exact;
     ASSERT_FALSE(iter->SeekAtOrBefore(Slice("key0000"), &exact));
@@ -831,7 +861,7 @@ TEST_F(TestCBTree, TestIteratorSeekAtOrBefore) {
   for (int i = 500; i <= key_num - 1; ++i) {
     string key = Substitute("key$0", i * 2 + 1);
     string expect_key = Substitute("key$0", i * 2);
-    gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(t.NewIterator());
+    unique_ptr<CBTreeIterator<SmallFanoutTraits>> iter(t.NewIterator());
     bool exact;
     ASSERT_TRUE(iter->SeekAtOrBefore(Slice(key), &exact));
     ASSERT_FALSE(exact);
@@ -845,7 +875,7 @@ TEST_F(TestCBTree, TestIteratorSeekAtOrBefore) {
   // Seek to after last key should successfully reach last key
   // and set exact = false;
   {
-    gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(t.NewIterator());
+    unique_ptr<CBTreeIterator<SmallFanoutTraits>> iter(t.NewIterator());
 
     bool exact;
     ASSERT_TRUE(iter->SeekAtOrBefore(Slice("key2001"), &exact));
diff --git a/src/kudu/tablet/concurrent_btree.h 
b/src/kudu/tablet/concurrent_btree.h
index 5d321ab..4e919d4 100644
--- a/src/kudu/tablet/concurrent_btree.h
+++ b/src/kudu/tablet/concurrent_btree.h
@@ -300,6 +300,10 @@ class ValueSlice {
     ptr_ = const_cast<const uint8_t*>(in_arena);
   }
 
+  void prefetch() {
+    ::prefetch(reinterpret_cast<const char*>(ptr_), PREFETCH_HINT_T0);
+  }
+
  private:
   const uint8_t* ptr_;
 } PACKED;
@@ -751,6 +755,10 @@ class LeafNode : public NodeBase<Traits> {
     return keys_[idx].as_slice();
   }
 
+  ValueSlice GetValue(size_t idx) const {
+    return vals_[idx];
+  }
+
   // Get the slice corresponding to the nth key and value.
   //
   // If the caller does not hold the lock, then this Slice
@@ -763,8 +771,8 @@ class LeafNode : public NodeBase<Traits> {
   // The key, on the other hand, will always be a valid pointer, but
   // may be invalid data.
   void Get(size_t idx, Slice *k, ValueSlice *v) const {
-    *k = keys_[idx].as_slice();
-    *v = vals_[idx];
+    *k = GetKey(idx);
+    *v = GetValue(idx);
   }
 
   // Truncates the node, removing entries from the right to reduce
@@ -1652,6 +1660,14 @@ class CBTreeIterator {
     return leaf_to_scan_->GetKey(idx_in_leaf_);
   }
 
+
+  Slice GetCurrentValue() const {
+    DCHECK(seeked_);
+    ValueSlice val_slice = leaf_to_scan_->GetValue(idx_in_leaf_);
+    return val_slice.as_slice();
+  }
+
+
   ////////////////////////////////////////////////////////////
   // Advanced functions which expose some of the internal state
   // of the iterator, allowing for limited "rewind" capability
@@ -1802,6 +1818,9 @@ class CBTreeIterator {
     }
 #ifdef SCAN_PREFETCH
     PrefetchMemory(next->next_);
+    for (int i = 0; i < next->num_entries(); i++) {
+      next->vals_[i].prefetch();
+    }
 #endif
 
     // If the tree is frozen, we don't need to play optimistic concurrency
diff --git a/src/kudu/tablet/memrowset.cc b/src/kudu/tablet/memrowset.cc
index 58cabab..49d04b7 100644
--- a/src/kudu/tablet/memrowset.cc
+++ b/src/kudu/tablet/memrowset.cc
@@ -492,18 +492,17 @@ Status MemRowSet::Iterator::NextBlock(RowBlock *dst) {
 Status MemRowSet::Iterator::FetchRows(RowBlock* dst, size_t* fetched) {
   *fetched = 0;
   do {
-    Slice k, v;
     RowBlockRow dst_row = dst->row(*fetched);
 
     // Copy the row into the destination, including projection
     // and relocating slices.
-    // TODO: can we share some code here with CopyRowToArena() from row.h
+    // TODO(todd): can we share some code here with CopyRowToArena() from row.h
     // or otherwise put this elsewhere?
-    iter_->GetCurrentEntry(&k, &v);
+    Slice v = iter_->GetCurrentValue();
     MRSRow row(memrowset_.get(), v);
 
     // Short-circuit if we've exceeded the iteration's upper bound.
-    if (has_upper_bound() && out_of_bounds(k)) {
+    if (has_upper_bound() && out_of_bounds(iter_->GetCurrentKey())) {
       state_ = kFinished;
       break;
     }
diff --git a/src/kudu/tablet/memrowset.h b/src/kudu/tablet/memrowset.h
index dff340c..f632cc7 100644
--- a/src/kudu/tablet/memrowset.h
+++ b/src/kudu/tablet/memrowset.h
@@ -29,6 +29,7 @@
 #include <glog/logging.h>
 
 #include "kudu/common/iterator.h"
+#include "kudu/common/iterator_stats.h"
 #include "kudu/common/row.h"
 #include "kudu/common/rowid.h"
 #include "kudu/common/schema.h"
@@ -55,7 +56,6 @@ class RowBlock;
 class RowBlockRow;
 class RowChangeList;
 class ScanSpec;
-struct IteratorStats;
 
 namespace fs {
 struct IOContext;
@@ -507,10 +507,9 @@ class MemRowSet::Iterator : public RowwiseIterator {
 
   // NOTE: This method will return a MRSRow with the MemRowSet schema.
   //       The row is NOT projected using the schema specified to the iterator.
-  const MRSRow GetCurrentRow() const {
+  MRSRow GetCurrentRow() const {
     DCHECK_NE(state_, kUninitialized) << "not initted";
-    Slice dummy, mrsrow_data;
-    iter_->GetCurrentEntry(&dummy, &mrsrow_data);
+    Slice mrsrow_data = iter_->GetCurrentValue();
     return MRSRow(memrowset_.get(), mrsrow_data);
   }
 

Reply via email to