This is an automated email from the ASF dual-hosted git repository. todd pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 4fb09d686a4a06fd30ee9787b8b65de559893531 Author: Todd Lipcon <[email protected]> AuthorDate: Wed Mar 25 12:06:32 2020 -0700 rowblock: improve GetSelectedRows API The earlier commit used a boolean return value to indicate whether all rows were selected, but as I used the API more, I found the code somewhat confusing. This adds a wrapper class with appropriate getters and error checking. Change-Id: I34630c7bb854cca572ec081acbc05f3dac4db931 Reviewed-on: http://gerrit.cloudera.org:8080/15559 Tested-by: Kudu Jenkins Reviewed-by: Andrew Wong <[email protected]> --- src/kudu/common/rowblock-test.cc | 17 ++++++---- src/kudu/common/rowblock.cc | 23 ++++++++------ src/kudu/common/rowblock.h | 69 +++++++++++++++++++++++++++++++++++----- src/kudu/common/wire_protocol.cc | 13 ++------ 4 files changed, 89 insertions(+), 33 deletions(-) diff --git a/src/kudu/common/rowblock-test.cc b/src/kudu/common/rowblock-test.cc index f9e9aca..ca8ec94 100644 --- a/src/kudu/common/rowblock-test.cc +++ b/src/kudu/common/rowblock-test.cc @@ -60,16 +60,20 @@ TEST(TestSelectionVector, TestNonByteAligned) { ASSERT_EQ(sv.nrows(), sv.CountSelected()); ASSERT_TRUE(sv.AnySelected()); - vector<uint16_t> sel; - ASSERT_FALSE(sv.GetSelectedRows(&sel)); + { + SelectedRows sel = sv.GetSelectedRows(); + ASSERT_TRUE(sel.all_selected()); + ASSERT_EQ(sv.nrows(), sel.num_selected()); + } for (size_t i = 0; i < sv.nrows(); i++) { sv.SetRowUnselected(i); } ASSERT_EQ(0, sv.CountSelected()); ASSERT_FALSE(sv.AnySelected()); - ASSERT_TRUE(sv.GetSelectedRows(&sel)); - ASSERT_EQ(0, sel.size()); + SelectedRows sel = sv.GetSelectedRows(); + ASSERT_EQ(0, sel.num_selected()); + ASSERT_FALSE(sel.all_selected()); } TEST(TestSelectionVector, TestGetSelectedRows) { @@ -80,8 +84,9 @@ TEST(TestSelectionVector, TestGetSelectedRows) { sv.SetRowSelected(i); } vector<uint16_t> selected; - ASSERT_TRUE(sv.GetSelectedRows(&selected)); - ASSERT_EQ(expected, selected); + SelectedRows sel = sv.GetSelectedRows(); + ASSERT_FALSE(sel.all_selected()); + ASSERT_EQ(expected, sel.indexes()); } } // namespace kudu diff --git a/src/kudu/common/rowblock.cc b/src/kudu/common/rowblock.cc index 74d1964..33fb220 100644 --- a/src/kudu/common/rowblock.cc +++ b/src/kudu/common/rowblock.cc @@ -17,6 +17,7 @@ #include "kudu/common/rowblock.h" #include <limits> +#include <numeric> #include <vector> #include <glog/logging.h> @@ -86,23 +87,21 @@ static void GetSelectedRowsInternal(const uint8_t* __restrict__ bitmap, }); } -bool SelectionVector::GetSelectedRows(vector<uint16_t>* selected) const { +SelectedRows SelectionVector::GetSelectedRows() const { CHECK_LE(n_rows_, std::numeric_limits<uint16_t>::max()); int n_selected = CountSelected(); if (n_selected == n_rows_) { - selected->clear(); - return false; + return SelectedRows(this); } - selected->resize(n_selected); - if (n_selected == 0) { - return true; + vector<uint16_t> selected; + if (n_selected > 0) { + selected.resize(n_selected); + GetSelectedRowsInternal(&bitmap_[0], n_bytes_, selected.data()); } - GetSelectedRowsInternal(&bitmap_[0], n_bytes_, selected->data()); - return true; + return SelectedRows(this, std::move(selected)); } - size_t SelectionVector::CountSelected() const { return Bits::Count(&bitmap_[0], n_bytes_); } @@ -142,6 +141,12 @@ bool operator!=(const SelectionVector& a, const SelectionVector& b) { return !(a == b); } +std::vector<uint16_t> SelectedRows::CreateRowIndexes() { + std::vector<uint16_t> ret(num_selected()); + std::iota(ret.begin(), ret.end(), 0); + return ret; +} + ////////////////////////////// // RowBlock ////////////////////////////// diff --git a/src/kudu/common/rowblock.h b/src/kudu/common/rowblock.h index cedc4a7..6e86e5f 100644 --- a/src/kudu/common/rowblock.h +++ b/src/kudu/common/rowblock.h @@ -20,6 +20,7 @@ #include <cstring> #include <memory> #include <string> +#include <utility> #include <vector> #include <glog/logging.h> @@ -28,7 +29,6 @@ #include "kudu/common/schema.h" #include "kudu/common/types.h" #include "kudu/gutil/macros.h" -#include "kudu/gutil/port.h" #include "kudu/gutil/strings/stringpiece.h" #include "kudu/util/bitmap.h" #include "kudu/util/status.h" @@ -37,6 +37,7 @@ namespace kudu { class Arena; class RowBlockRow; +class SelectedRows; // Bit-vector representing the selection status of each row in a row block. // @@ -102,13 +103,10 @@ class SelectionVector { return BitmapFindFirstSet(&bitmap_[0], row_offset, n_rows_, row); } - // Sets '*selected' to the indices of all rows marked as selected - // in this selection vector. - // - // NOTE: in the case that all rows are selected, a fast path is triggered - // in which false is returned with an empty 'selected'. Otherwise, returns - // true. - bool GetSelectedRows(std::vector<uint16_t>* selected) const WARN_UNUSED_RESULT; + // Processes the selection vector to return a SelectedRows object which indicates + // the indices of the selected rows. The returned object refers to the memory + // of this SelectionVector and must not be retained longer than this instance. + SelectedRows GetSelectedRows() const; uint8_t *mutable_bitmap() { return &bitmap_[0]; @@ -227,6 +225,61 @@ class SelectionVectorView { size_t row_offset_; }; +// Result type for SelectionVector::GetSelectedRows. +class SelectedRows { + public: + bool all_selected() const { + return all_selected_; + } + + int num_selected() const { + return all_selected_ ? sel_vector_->nrows() : indexes_.size(); + } + + // Get the selected rows. + // + // NOTE: callers must check all_selected() first, and not use this + // function if all rows are selected. 'AsRowIndexes()' may be used instead. + const std::vector<uint16_t>& indexes() const { + DCHECK(!all_selected_); + return indexes_; + } + + const uint8_t* bitmap() const { + return sel_vector_->bitmap(); + } + + // Convert this object to a simple vector of row indexes, materializing that + // vector in the case that all of the rows are selected. + std::vector<uint16_t> ToRowIndexes() && { + if (!all_selected_) { + return std::move(indexes_); + } + return CreateRowIndexes(); + } + + private: + friend class SelectionVector; + + explicit SelectedRows(const SelectionVector* sel_vector) + : sel_vector_(sel_vector), + all_selected_(true) { + } + explicit SelectedRows(const SelectionVector* sel_vector, + std::vector<uint16_t> indexes) + : sel_vector_(sel_vector), + all_selected_(false), + indexes_(std::move(indexes)) { + } + + std::vector<uint16_t> CreateRowIndexes(); + + const SelectionVector* const sel_vector_; + const bool all_selected_; + std::vector<uint16_t> indexes_; +}; + + // A block of decoded rows. // Wrapper around a buffer, which keeps the buffer's size, associated arena, // and schema. Provides convenience accessors for indexing by row, column, etc. diff --git a/src/kudu/common/wire_protocol.cc b/src/kudu/common/wire_protocol.cc index 5b2bc3f..ef14d8b 100644 --- a/src/kudu/common/wire_protocol.cc +++ b/src/kudu/common/wire_protocol.cc @@ -22,7 +22,6 @@ #include <algorithm> #include <cstdint> #include <cstring> -#include <numeric> #include <ostream> #include <string> #include <vector> @@ -935,15 +934,9 @@ int SerializeRowBlock(const RowBlock& block, bool pad_unixtime_micros_to_16_bytes) { DCHECK_GT(block.nrows(), 0); - vector<uint16_t> selected_row_indexes; - bool all_selected = !block.selection_vector()->GetSelectedRows( - &selected_row_indexes); - if (all_selected) { - // TODO(todd): add a fast-path for this in the 'Copy' functions. - selected_row_indexes.resize(block.nrows()); - std::iota(selected_row_indexes.begin(), - selected_row_indexes.end(), 0); - } + vector<uint16_t> selected_row_indexes = + block.selection_vector()->GetSelectedRows().ToRowIndexes(); + size_t num_rows = selected_row_indexes.size(); // Fast-path empty blocks (eg because the predicate didn't match any rows or
