This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 3316848c2 [common] minor cleanup on RowBlock/RowBlockRow
3316848c2 is described below

commit 3316848c259804c7e90f3a3b7a21b62db1c51249
Author: Alexey Serbin <[email protected]>
AuthorDate: Tue Feb 4 13:01:17 2025 -0800

    [common] minor cleanup on RowBlock/RowBlockRow
    
    This patch addresses a few const-correctness, CHECK/DCHECK and
    code style issues in RowBlock/RowBlockRow/SelectionVector classes.
    
    Since I have patches with updates in the related code, I'm publishing
    this small clean-up in a separate patch.
    
    This patch doesn't contain any functional modifications.
    
    Change-Id: If9eda1ed495875bd77babbf3523bb206dd30e8d5
    Reviewed-on: http://gerrit.cloudera.org:8080/22450
    Reviewed-by: Yifan Zhang <[email protected]>
    Tested-by: Alexey Serbin <[email protected]>
---
 src/kudu/common/columnblock.h |   1 +
 src/kudu/common/rowblock.cc   |  58 +++++++++++------------
 src/kudu/common/rowblock.h    | 108 +++++++++++++++++++++++-------------------
 3 files changed, 87 insertions(+), 80 deletions(-)

diff --git a/src/kudu/common/columnblock.h b/src/kudu/common/columnblock.h
index 51881cb11..7130e94df 100644
--- a/src/kudu/common/columnblock.h
+++ b/src/kudu/common/columnblock.h
@@ -157,6 +157,7 @@ class ColumnBlock {
  protected:
   friend class ColumnBlockCell;
   friend class ColumnDataView;
+  friend class RowBlockRow;
 
   // Return a pointer to the given cell.
   uint8_t* mutable_cell_ptr(size_t idx) {
diff --git a/src/kudu/common/rowblock.cc b/src/kudu/common/rowblock.cc
index 6271df0e6..328efb33c 100644
--- a/src/kudu/common/rowblock.cc
+++ b/src/kudu/common/rowblock.cc
@@ -32,11 +32,11 @@ using std::vector;
 namespace kudu {
 
 SelectionVector::SelectionVector(size_t row_capacity)
-  : bytes_capacity_(BitmapSize(row_capacity)),
-    n_rows_(row_capacity),
-    n_bytes_(bytes_capacity_),
-    bitmap_(new uint8_t[n_bytes_]) {
-  CHECK_GT(n_bytes_, 0);
+    : bytes_capacity_(BitmapSize(row_capacity)),
+      n_rows_(row_capacity),
+      n_bytes_(bytes_capacity_),
+      bitmap_(new uint8_t[n_bytes_]) {
+  DCHECK_GT(n_bytes_, 0);
   PadExtraBitsWithZeroes();
 }
 
@@ -45,7 +45,7 @@ void SelectionVector::Resize(size_t n_rows) {
     return;
   }
 
-  size_t new_bytes = BitmapSize(n_rows);
+  const size_t new_bytes = BitmapSize(n_rows);
   CHECK_LE(new_bytes, bytes_capacity_);
   n_rows_ = n_rows;
   n_bytes_ = new_bytes;
@@ -76,7 +76,6 @@ void SelectionVector::ClearToSelectAtMost(size_t max_rows) {
   }
 }
 
-
 template<bool BMI>
 static void GetSelectedRowsInternal(const uint8_t* __restrict__ bitmap,
                                     int n_bytes,
@@ -87,8 +86,6 @@ static void GetSelectedRowsInternal(const uint8_t* 
__restrict__ bitmap,
                 });
 }
 
-static const bool kHasBmi = base::CPU().has_bmi();
-
 #ifdef __x86_64__
 // Explicit instantiation with the BMI instruction set enabled, which
 // makes this slightly faster.
@@ -100,14 +97,16 @@ void GetSelectedRowsInternal<true>(const uint8_t* 
__restrict__ bitmap,
 #endif
 
 SelectedRows SelectionVector::GetSelectedRows() const {
-  CHECK_LE(n_rows_, std::numeric_limits<uint16_t>::max());
-  int n_selected = CountSelected();
+  DCHECK_LE(n_rows_, std::numeric_limits<uint16_t>::max());
+
+  size_t n_selected = CountSelected();
   if (n_selected == n_rows_) {
     return SelectedRows(this);
   }
 
   vector<uint16_t> selected(n_selected > 0 ? n_selected : 0);
   if (n_selected > 0) {
+    static const bool kHasBmi = base::CPU().has_bmi();
     if (kHasBmi) {
       GetSelectedRowsInternal<true>(&bitmap_[0], n_bytes_, selected.data());
     } else {
@@ -123,8 +122,7 @@ size_t SelectionVector::CountSelected() const {
 
 bool SelectionVector::AnySelected() const {
   size_t rem = n_bytes_;
-  const uint32_t *p32 = reinterpret_cast<const uint32_t *>(
-    &bitmap_[0]);
+  const uint32_t* p32 = reinterpret_cast<const uint32_t*>(&bitmap_[0]);
   while (rem >= 4) {
     if (*p32 != 0) {
       return true;
@@ -133,7 +131,7 @@ bool SelectionVector::AnySelected() const {
     rem -= 4;
   }
 
-  const uint8_t *p8 = reinterpret_cast<const uint8_t *>(p32);
+  const uint8_t* p8 = reinterpret_cast<const uint8_t*>(p32);
   while (rem > 0) {
     if (*p8 != 0) {
       return true;
@@ -166,18 +164,18 @@ std::vector<uint16_t> SelectedRows::CreateRowIndexes() {
 // RowBlock
 //////////////////////////////
 RowBlock::RowBlock(const Schema* schema,
-                   size_t nrows,
+                   size_t nrows_capacity,
                    RowBlockMemory* memory)
-  : schema_(schema),
-    columns_data_(schema->num_columns()),
-    column_non_null_bitmaps_(schema->num_columns()),
-    row_capacity_(nrows),
-    nrows_(nrows),
-    memory_(memory),
-    sel_vec_(nrows) {
-  CHECK_GT(row_capacity_, 0);
-
-  size_t bitmap_size = BitmapSize(row_capacity_);
+    : schema_(schema),
+      row_capacity_(nrows_capacity),
+      columns_data_(schema->num_columns()),
+      column_non_null_bitmaps_(schema->num_columns()),
+      nrows_(row_capacity_),
+      memory_(memory),
+      sel_vec_(row_capacity_) {
+  DCHECK_GT(row_capacity_, 0);
+
+  const size_t bitmap_size = BitmapSize(row_capacity_);
   for (size_t i = 0; i < schema->num_columns(); ++i) {
     const ColumnSchema& col_schema = schema->column(i);
     size_t col_size = row_capacity_ * col_schema.type_info()->size();
@@ -198,14 +196,14 @@ RowBlock::~RowBlock() {
   }
 }
 
-void RowBlock::Resize(size_t n_rows) {
-  if (PREDICT_FALSE(n_rows == nrows_)) {
+void RowBlock::Resize(size_t nrows) {
+  if (PREDICT_FALSE(nrows == nrows_)) {
     return;
   }
 
-  CHECK_LE(n_rows, row_capacity_);
-  nrows_ = n_rows;
-  sel_vec_.Resize(n_rows);
+  CHECK_LE(nrows, row_capacity_);
+  nrows_ = nrows;
+  sel_vec_.Resize(nrows);
 }
 
 } // namespace kudu
diff --git a/src/kudu/common/rowblock.h b/src/kudu/common/rowblock.h
index d1f3f9178..42edfb877 100644
--- a/src/kudu/common/rowblock.h
+++ b/src/kudu/common/rowblock.h
@@ -40,11 +40,15 @@ class Arena;
 class RowBlockRow;
 class SelectedRows;
 
+namespace tablet {
+class TestDeltaFile;
+}
+
 // Bit-vector representing the selection status of each row in a row block.
 //
 // When scanning through data, a 1 bit in the selection vector indicates that
 // the given row is live and has passed all predicates.
-class SelectionVector {
+class SelectionVector final {
  public:
   // Construct a new vector. The bits are initially in an indeterminate state.
   // Call SetAllTrue() if you require all rows to be initially selected.
@@ -58,7 +62,7 @@ class SelectionVector {
   //
   // The underlying bytes must not be deallocated or else this object will 
become
   // invalid.
-  SelectionVector(SelectionVector *other, size_t prefix_rows);
+  SelectionVector(SelectionVector* other, size_t prefix_rows);
 
   // Resize the selection vector to the given number of rows.
   // This size must be <= the allocated capacity.
@@ -109,11 +113,11 @@ class SelectionVector {
   // of this SelectionVector and must not be retained longer than this 
instance.
   SelectedRows GetSelectedRows() const;
 
-  uint8_t *mutable_bitmap() {
+  uint8_t* mutable_bitmap() {
     return &bitmap_[0];
   }
 
-  const uint8_t *bitmap() const {
+  const uint8_t* bitmap() const {
     return &bitmap_[0];
   }
 
@@ -175,7 +179,7 @@ class SelectionVector {
   }
 
   // The number of allocated bytes in bitmap_
-  size_t bytes_capacity_;
+  const size_t bytes_capacity_;
 
   size_t n_rows_;
   size_t n_bytes_;
@@ -192,33 +196,33 @@ bool operator!=(const SelectionVector& a, const 
SelectionVector& b);
 // batch will start from. After processing a batch, Advance() should be called
 // and the view will move forward by the appropriate amount. In this way, the
 // underlying selection vector can easily be updated batch-by-batch.
-class SelectionVectorView {
+class SelectionVectorView final {
  public:
   // Constructs a new SelectionVectorView.
   //
   // The 'sel_vec' object must outlive this SelectionVectorView.
-  explicit SelectionVectorView(SelectionVector *sel_vec)
-    : sel_vec_(sel_vec), row_offset_(0) {
+  explicit SelectionVectorView(SelectionVector* sel_vec)
+      : sel_vec_(sel_vec), row_offset_(0) {
   }
   void Advance(size_t skip) {
-    DCHECK_LE(skip, sel_vec_->nrows() - row_offset_);
+    DCHECK_LE(skip + row_offset_, sel_vec_->nrows());
     row_offset_ += skip;
   }
   void SetBit(size_t row_idx) {
-    DCHECK_LE(row_idx, sel_vec_->nrows() - row_offset_);
+    DCHECK_LE(row_idx + row_offset_, sel_vec_->nrows());
     BitmapSet(sel_vec_->mutable_bitmap(), row_offset_ + row_idx);
   }
   void ClearBit(size_t row_idx) {
-    DCHECK_LE(row_idx, sel_vec_->nrows() - row_offset_);
+    DCHECK_LE(row_idx + row_offset_, sel_vec_->nrows());
     BitmapClear(sel_vec_->mutable_bitmap(), row_offset_ + row_idx);
   }
-  bool TestBit(size_t row_idx) {
-    DCHECK_LE(row_idx, sel_vec_->nrows() - row_offset_);
+  bool TestBit(size_t row_idx) const {
+    DCHECK_LE(row_idx + row_offset_, sel_vec_->nrows());
     return BitmapTest(sel_vec_->bitmap(), row_offset_ + row_idx);
   }
   // Clear "nrows" bits from the supplied "offset" in the current view.
   void ClearBits(size_t nrows, size_t offset = 0) {
-    DCHECK_LE(offset + nrows, sel_vec_->nrows() - row_offset_);
+    DCHECK_LE(offset + row_offset_ + nrows, sel_vec_->nrows());
     BitmapChangeBits(sel_vec_->mutable_bitmap(), row_offset_ + offset, nrows, 
false);
   }
  private:
@@ -227,7 +231,7 @@ class SelectionVectorView {
 };
 
 // Result type for SelectionVector::GetSelectedRows.
-class SelectedRows {
+class SelectedRows final {
  public:
   bool all_selected() const {
     return all_selected_;
@@ -305,22 +309,22 @@ class SelectedRows {
 // Typically in C++ this is done with separate wrapper classes for const vs 
non-const
 // referred-to data, but that would require a lot of duplication elsewhere in 
the code,
 // so for now, we just use convention: if you have a RowBlock or ColumnBlock 
parameter
-// that you expect to be modifying, use a "RowBlock *param". Otherwise, use a
+// that you expect to be modifying, use a "RowBlock* param". Otherwise, use a
 // "const RowBlock& param". Just because you _could_ modify the referred-to 
contents
 // of the latter doesn't mean you _should_.
-class RowBlock {
+class RowBlock final {
  public:
   // Constructs a new RowBlock.
   //
   // The 'schema' and 'arena' objects must outlive this RowBlock.
-  RowBlock(const Schema* schema, size_t nrows, RowBlockMemory* memory);
+  RowBlock(const Schema* schema, size_t nrows_capacity, RowBlockMemory* 
memory);
   ~RowBlock();
 
   // Resize the block to the given number of rows.
   // This size must be <= the the allocated capacity row_capacity().
   //
   // Ensures that all rows for indices < n_rows are unmodified.
-  void Resize(size_t n_rows);
+  void Resize(size_t nrows);
 
   size_t row_capacity() const {
     return row_capacity_;
@@ -359,22 +363,6 @@ class RowBlock {
   // rows which were filtered out by the selection vector.
   size_t nrows() const { return nrows_; }
 
-  // Zero the memory pointed to by this row block.
-  // This physically zeros the memory, so is not efficient - mostly useful
-  // from unit tests.
-  void ZeroMemory() {
-    size_t bitmap_size = BitmapSize(row_capacity_);
-    for (size_t i = 0; i < schema_->num_columns(); ++i) {
-      const ColumnSchema& col_schema = schema_->column(i);
-      size_t col_size = col_schema.type_info()->size() * row_capacity_;
-      memset(columns_data_[i], '\0', col_size);
-
-      if (column_non_null_bitmaps_[i] != NULL) {
-        memset(column_non_null_bitmaps_[i], '\0', bitmap_size);
-      }
-    }
-  }
-
   // Return the selection vector which indicates which rows have passed
   // predicates so far during evaluation of this block of rows.
   //
@@ -420,27 +408,45 @@ class RowBlock {
   }
 
  private:
-  DISALLOW_COPY_AND_ASSIGN(RowBlock);
+  friend class tablet::TestDeltaFile;
 
   static size_t RowBlockSize(const Schema& schema, size_t nrows) {
+    const size_t bitmap_size = BitmapSize(nrows);
     size_t block_size = schema.num_columns() * sizeof(size_t);
-    size_t bitmap_size = BitmapSize(nrows);
-    for (size_t col = 0; col < schema.num_columns(); col++) {
-      const ColumnSchema& col_schema = schema.column(col);
+    for (const auto& col_schema : schema.columns()) {
       block_size += nrows * col_schema.type_info()->size();
-      if (col_schema.is_nullable())
+      if (col_schema.is_nullable()) {
         block_size += bitmap_size;
+      }
     }
     return block_size;
   }
 
+  // Zero the memory pointed to by this row block.
+  // This physically zeros the memory, so is not efficient - mostly useful
+  // from unit tests.
+  void ZeroMemory() {
+    const size_t bitmap_size = BitmapSize(row_capacity_);
+    for (size_t i = 0; i < schema_->num_columns(); ++i) {
+      const ColumnSchema& col_schema = schema_->column(i);
+      size_t col_size = col_schema.type_info()->size() * row_capacity_;
+      memset(columns_data_[i], '\0', col_size);
+
+      if (column_non_null_bitmaps_[i] != nullptr) {
+        memset(column_non_null_bitmaps_[i], '\0', bitmap_size);
+      }
+    }
+  }
+
+  // The schema for the rows in this RowBlock.
   const Schema* schema_;
+
+  // The maximum number of rows that can be stored in the allocated buffer.
+  const size_t row_capacity_;
+
   std::vector<uint8_t*> columns_data_;
   std::vector<uint8_t*> column_non_null_bitmaps_;
 
-  // The maximum number of rows that can be stored in our allocated buffer.
-  size_t row_capacity_;
-
   // The number of rows currently being processed in this block.
   // nrows_ <= row_capacity_
   size_t nrows_;
@@ -451,15 +457,17 @@ class RowBlock {
   // Deleted rows or rows which have failed to pass predicates will be zeroed
   // in the bitmap, and thus not returned to the end user.
   SelectionVector sel_vec_;
+
+  DISALLOW_COPY_AND_ASSIGN(RowBlock);
 };
 
 // Provides an abstraction to interact with a RowBlock row.
 //  Example usage:
 //    RowBlock row_block(schema, 10, NULL);
 //    RowBlockRow row = row_block.row(5);       // Get row 5
-//    void *cell_data = row.cell_ptr(3);        // Get column 3 of row 5
+//    void* cell_data = row.cell_ptr(3);        // Get column 3 of row 5
 //    ...
-class RowBlockRow {
+class RowBlockRow final {
  public:
   typedef ColumnBlock::Cell Cell;
 
@@ -468,7 +476,7 @@ class RowBlockRow {
   // The 'row_block' object must outlive this RowBlockRow.
   explicit RowBlockRow(const RowBlock* row_block = nullptr,
                        size_t row_index = 0)
-    : row_block_(row_block), row_index_(row_index) {
+      : row_block_(row_block), row_index_(row_index) {
   }
 
   RowBlockRow* Reset(const RowBlock* row_block, size_t row_index) {
@@ -493,14 +501,14 @@ class RowBlockRow {
     return column_block(col_idx).is_null(row_index_);
   }
 
-  uint8_t* mutable_cell_ptr(size_t col_idx) const {
-    return const_cast<uint8_t*>(cell_ptr(col_idx));
-  }
-
   const uint8_t* cell_ptr(size_t col_idx) const {
     return column_block(col_idx).cell_ptr(row_index_);
   }
 
+  uint8_t* mutable_cell_ptr(size_t col_idx) {
+    return column_block(col_idx).mutable_cell_ptr(row_index_);
+  }
+
   const uint8_t* nullable_cell_ptr(size_t col_idx) const {
     return column_block(col_idx).nullable_cell_ptr(row_index_);
   }

Reply via email to