This is an automated email from the ASF dual-hosted git repository.

todd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 4fb09d686a4a06fd30ee9787b8b65de559893531
Author: Todd Lipcon <[email protected]>
AuthorDate: Wed Mar 25 12:06:32 2020 -0700

    rowblock: improve GetSelectedRows API
    
    The earlier commit used a boolean return value to indicate whether all
    rows were selected, but as I used the API more, I found the code
    somewhat confusing. This adds a wrapper class with appropriate getters
    and error checking.
    
    Change-Id: I34630c7bb854cca572ec081acbc05f3dac4db931
    Reviewed-on: http://gerrit.cloudera.org:8080/15559
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <[email protected]>
---
 src/kudu/common/rowblock-test.cc | 17 ++++++----
 src/kudu/common/rowblock.cc      | 23 ++++++++------
 src/kudu/common/rowblock.h       | 69 +++++++++++++++++++++++++++++++++++-----
 src/kudu/common/wire_protocol.cc | 13 ++------
 4 files changed, 89 insertions(+), 33 deletions(-)

diff --git a/src/kudu/common/rowblock-test.cc b/src/kudu/common/rowblock-test.cc
index f9e9aca..ca8ec94 100644
--- a/src/kudu/common/rowblock-test.cc
+++ b/src/kudu/common/rowblock-test.cc
@@ -60,16 +60,20 @@ TEST(TestSelectionVector, TestNonByteAligned) {
   ASSERT_EQ(sv.nrows(), sv.CountSelected());
   ASSERT_TRUE(sv.AnySelected());
 
-  vector<uint16_t> sel;
-  ASSERT_FALSE(sv.GetSelectedRows(&sel));
+  {
+    SelectedRows sel = sv.GetSelectedRows();
+    ASSERT_TRUE(sel.all_selected());
+    ASSERT_EQ(sv.nrows(), sel.num_selected());
+  }
 
   for (size_t i = 0; i < sv.nrows(); i++) {
     sv.SetRowUnselected(i);
   }
   ASSERT_EQ(0, sv.CountSelected());
   ASSERT_FALSE(sv.AnySelected());
-  ASSERT_TRUE(sv.GetSelectedRows(&sel));
-  ASSERT_EQ(0, sel.size());
+  SelectedRows sel = sv.GetSelectedRows();
+  ASSERT_EQ(0, sel.num_selected());
+  ASSERT_FALSE(sel.all_selected());
 }
 
 TEST(TestSelectionVector, TestGetSelectedRows) {
@@ -80,8 +84,9 @@ TEST(TestSelectionVector, TestGetSelectedRows) {
     sv.SetRowSelected(i);
   }
   vector<uint16_t> selected;
-  ASSERT_TRUE(sv.GetSelectedRows(&selected));
-  ASSERT_EQ(expected, selected);
+  SelectedRows sel = sv.GetSelectedRows();
+  ASSERT_FALSE(sel.all_selected());
+  ASSERT_EQ(expected, sel.indexes());
 }
 
 } // namespace kudu
diff --git a/src/kudu/common/rowblock.cc b/src/kudu/common/rowblock.cc
index 74d1964..33fb220 100644
--- a/src/kudu/common/rowblock.cc
+++ b/src/kudu/common/rowblock.cc
@@ -17,6 +17,7 @@
 #include "kudu/common/rowblock.h"
 
 #include <limits>
+#include <numeric>
 #include <vector>
 
 #include <glog/logging.h>
@@ -86,23 +87,21 @@ static void GetSelectedRowsInternal(const uint8_t* 
__restrict__ bitmap,
                 });
 }
 
-bool SelectionVector::GetSelectedRows(vector<uint16_t>* selected) const {
+SelectedRows SelectionVector::GetSelectedRows() const {
   CHECK_LE(n_rows_, std::numeric_limits<uint16_t>::max());
   int n_selected = CountSelected();
   if (n_selected == n_rows_) {
-    selected->clear();
-    return false;
+    return SelectedRows(this);
   }
 
-  selected->resize(n_selected);
-  if (n_selected == 0) {
-    return true;
+  vector<uint16_t> selected;
+  if (n_selected > 0) {
+    selected.resize(n_selected);
+    GetSelectedRowsInternal(&bitmap_[0], n_bytes_, selected.data());
   }
-  GetSelectedRowsInternal(&bitmap_[0], n_bytes_, selected->data());
-  return true;
+  return SelectedRows(this, std::move(selected));
 }
 
-
 size_t SelectionVector::CountSelected() const {
   return Bits::Count(&bitmap_[0], n_bytes_);
 }
@@ -142,6 +141,12 @@ bool operator!=(const SelectionVector& a, const 
SelectionVector& b) {
   return !(a == b);
 }
 
+std::vector<uint16_t> SelectedRows::CreateRowIndexes() {
+  std::vector<uint16_t> ret(num_selected());
+  std::iota(ret.begin(), ret.end(), 0);
+  return ret;
+}
+
 //////////////////////////////
 // RowBlock
 //////////////////////////////
diff --git a/src/kudu/common/rowblock.h b/src/kudu/common/rowblock.h
index cedc4a7..6e86e5f 100644
--- a/src/kudu/common/rowblock.h
+++ b/src/kudu/common/rowblock.h
@@ -20,6 +20,7 @@
 #include <cstring>
 #include <memory>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include <glog/logging.h>
@@ -28,7 +29,6 @@
 #include "kudu/common/schema.h"
 #include "kudu/common/types.h"
 #include "kudu/gutil/macros.h"
-#include "kudu/gutil/port.h"
 #include "kudu/gutil/strings/stringpiece.h"
 #include "kudu/util/bitmap.h"
 #include "kudu/util/status.h"
@@ -37,6 +37,7 @@ namespace kudu {
 
 class Arena;
 class RowBlockRow;
+class SelectedRows;
 
 // Bit-vector representing the selection status of each row in a row block.
 //
@@ -102,13 +103,10 @@ class SelectionVector {
     return BitmapFindFirstSet(&bitmap_[0], row_offset, n_rows_, row);
   }
 
-  // Sets '*selected' to the indices of all rows marked as selected
-  // in this selection vector.
-  //
-  // NOTE: in the case that all rows are selected, a fast path is triggered
-  // in which false is returned with an empty 'selected'. Otherwise, returns
-  // true.
-  bool GetSelectedRows(std::vector<uint16_t>* selected) const 
WARN_UNUSED_RESULT;
+  // Processes the selection vector to return a SelectedRows object which 
indicates
+  // the indices of the selected rows. The returned object refers to the memory
+  // of this SelectionVector and must not be retained longer than this 
instance.
+  SelectedRows GetSelectedRows() const;
 
   uint8_t *mutable_bitmap() {
     return &bitmap_[0];
@@ -227,6 +225,61 @@ class SelectionVectorView {
   size_t row_offset_;
 };
 
+// Result type for SelectionVector::GetSelectedRows.
+class SelectedRows {
+ public:
+  bool all_selected() const {
+    return all_selected_;
+  }
+
+  int num_selected() const {
+    return all_selected_ ? sel_vector_->nrows() : indexes_.size();
+  }
+
+  // Get the selected rows.
+  //
+  // NOTE: callers must check all_selected() first, and not use this
+  // function if all rows are selected. 'AsRowIndexes()' may be used instead.
+  const std::vector<uint16_t>& indexes() const {
+    DCHECK(!all_selected_);
+    return indexes_;
+  }
+
+  const uint8_t* bitmap() const {
+    return sel_vector_->bitmap();
+  }
+
+  // Convert this object to a simple vector of row indexes, materializing that
+  // vector in the case that all of the rows are selected.
+  std::vector<uint16_t> ToRowIndexes() && {
+    if (!all_selected_) {
+      return std::move(indexes_);
+    }
+    return CreateRowIndexes();
+  }
+
+ private:
+  friend class SelectionVector;
+
+  explicit SelectedRows(const SelectionVector* sel_vector)
+      : sel_vector_(sel_vector),
+        all_selected_(true) {
+  }
+  explicit SelectedRows(const SelectionVector* sel_vector,
+                        std::vector<uint16_t> indexes)
+      : sel_vector_(sel_vector),
+        all_selected_(false),
+        indexes_(std::move(indexes)) {
+  }
+
+  std::vector<uint16_t> CreateRowIndexes();
+
+  const SelectionVector* const sel_vector_;
+  const bool all_selected_;
+  std::vector<uint16_t> indexes_;
+};
+
+
 // A block of decoded rows.
 // Wrapper around a buffer, which keeps the buffer's size, associated arena,
 // and schema. Provides convenience accessors for indexing by row, column, etc.
diff --git a/src/kudu/common/wire_protocol.cc b/src/kudu/common/wire_protocol.cc
index 5b2bc3f..ef14d8b 100644
--- a/src/kudu/common/wire_protocol.cc
+++ b/src/kudu/common/wire_protocol.cc
@@ -22,7 +22,6 @@
 #include <algorithm>
 #include <cstdint>
 #include <cstring>
-#include <numeric>
 #include <ostream>
 #include <string>
 #include <vector>
@@ -935,15 +934,9 @@ int SerializeRowBlock(const RowBlock& block,
                       bool pad_unixtime_micros_to_16_bytes) {
   DCHECK_GT(block.nrows(), 0);
 
-  vector<uint16_t> selected_row_indexes;
-  bool all_selected = !block.selection_vector()->GetSelectedRows(
-      &selected_row_indexes);
-  if (all_selected) {
-    // TODO(todd): add a fast-path for this in the 'Copy' functions.
-    selected_row_indexes.resize(block.nrows());
-    std::iota(selected_row_indexes.begin(),
-              selected_row_indexes.end(), 0);
-  }
+  vector<uint16_t> selected_row_indexes =
+      block.selection_vector()->GetSelectedRows().ToRowIndexes();
+
   size_t num_rows = selected_row_indexes.size();
 
   // Fast-path empty blocks (eg because the predicate didn't match any rows or

Reply via email to