This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 86536167a0a [Improvement](segment iterator) Optimize column row 
reservation to reduce overhead (#42060)
86536167a0a is described below

commit 86536167a0a099d53c1a787f167905dcfa049bae
Author: airborne12 <[email protected]>
AuthorDate: Tue Oct 22 18:51:09 2024 +0800

    [Improvement](segment iterator) Optimize column row reservation to reduce 
overhead (#42060)
    
    ## Proposed changes
    
    This PR improves the segment iterator by reducing the reserved rows for
    a column when the row bitmap is smaller than the block row. This
    optimization aims to enhance memory efficiency and improve performance
    by reducing unnecessary row allocations.
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index a921674a1e5..985cdc16e68 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1989,6 +1989,9 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
     if (UNLIKELY(!_lazy_inited)) {
         RETURN_IF_ERROR(_lazy_init());
         _lazy_inited = true;
+        // If the row bitmap size is smaller than block_row_max, there's no 
need to reserve that many column rows.
+        auto nrows_reserve_limit =
+                std::min(_row_bitmap.cardinality(), 
uint64_t(_opts.block_row_max));
         if (_lazy_materialization_read || _opts.record_rowids || 
_is_need_expr_eval) {
             _block_rowids.resize(_opts.block_row_max);
         }
@@ -2013,7 +2016,7 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
                                 storage_column_type->is_nullable(), 
_opts.io_ctx.reader_type));
                 _current_return_columns[cid]->set_rowset_segment_id(
                         {_segment->rowset_id(), _segment->id()});
-                _current_return_columns[cid]->reserve(_opts.block_row_max);
+                _current_return_columns[cid]->reserve(nrows_reserve_limit);
             } else if (i >= block->columns()) {
                 // if i >= block->columns means the column and not the 
pred_column means `column i` is
                 // a delete condition column. but the column is not effective 
in the segment. so we just
@@ -2024,7 +2027,7 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
                 // TODO: skip read the not effective delete column to speed up 
segment read.
                 _current_return_columns[cid] =
                         
Schema::get_data_type_ptr(*column_desc)->create_column();
-                _current_return_columns[cid]->reserve(_opts.block_row_max);
+                _current_return_columns[cid]->reserve(nrows_reserve_limit);
             }
         }
 
@@ -2049,7 +2052,8 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
     if (_can_opt_topn_reads()) {
         nrows_read_limit = std::min(static_cast<uint32_t>(_opts.topn_limit), 
nrows_read_limit);
     }
-
+    // If the row bitmap size is smaller than nrows_read_limit, there's no 
need to reserve that many column rows.
+    nrows_read_limit = std::min(_row_bitmap.cardinality(), 
uint64_t(nrows_read_limit));
     DBUG_EXECUTE_IF("segment_iterator.topn_opt_1", {
         if (nrows_read_limit != 1) {
             return Status::Error<ErrorCode::INTERNAL_ERROR>("topn opt 1 
execute failed: {}",


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to