This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 1c85db2c215 [opt](inverted index) topn opt reads only limit number of 
records #33163 (#33602)
1c85db2c215 is described below

commit 1c85db2c2151401fea4e6d8f6f323be3aed36804
Author: zzzxl <[email protected]>
AuthorDate: Thu Apr 18 08:56:07 2024 +0800

    [opt](inverted index) topn opt reads only limit number of records #33163 
(#33602)
---
 be/src/olap/iterators.h                            |  1 +
 be/src/olap/rowset/beta_rowset_reader.cpp          |  1 +
 be/src/olap/rowset/beta_rowset_reader.h            |  3 +++
 be/src/olap/rowset/rowset_reader.h                 |  2 ++
 be/src/olap/rowset/rowset_reader_context.h         |  1 +
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 19 +++++++++++++++++++
 be/src/olap/rowset/segment_v2/segment_iterator.h   |  2 ++
 be/src/vec/olap/vcollect_iterator.cpp              |  1 +
 8 files changed, 30 insertions(+)

diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index 3b9d205e83d..68ebc98570c 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -110,6 +110,7 @@ public:
     RowsetId rowset_id;
     Version version;
     int32_t tablet_id = 0;
+    size_t topn_limit = 0;
 };
 
 class RowwiseIterator;
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp 
b/be/src/olap/rowset/beta_rowset_reader.cpp
index bb11347990c..1394e3a006f 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -101,6 +101,7 @@ Status 
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
     _read_options.rowset_id = _rowset->rowset_id();
     _read_options.version = _rowset->version();
     _read_options.tablet_id = _rowset->rowset_meta()->tablet_id();
+    _read_options.topn_limit = _topn_limit;
     if (_read_context->lower_bound_keys != nullptr) {
         for (int i = 0; i < _read_context->lower_bound_keys->size(); ++i) {
             
_read_options.key_ranges.emplace_back(&_read_context->lower_bound_keys->at(i),
diff --git a/be/src/olap/rowset/beta_rowset_reader.h 
b/be/src/olap/rowset/beta_rowset_reader.h
index 2a9a5cf8072..db0b1facc73 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -84,6 +84,8 @@ public:
 
     RowsetReaderSharedPtr clone() override;
 
+    void set_topn_limit(size_t topn_limit) override { _topn_limit = 
topn_limit; }
+
 private:
     Status _init_iterator_once();
     Status _init_iterator();
@@ -122,6 +124,7 @@ private:
     StorageReadOptions _read_options;
 
     bool _empty = false;
+    size_t _topn_limit = 0;
 };
 
 } // namespace doris
diff --git a/be/src/olap/rowset/rowset_reader.h 
b/be/src/olap/rowset/rowset_reader.h
index 4b3791da917..34caa615c25 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -88,6 +88,8 @@ public:
     virtual bool update_profile(RuntimeProfile* profile) = 0;
 
     virtual RowsetReaderSharedPtr clone() = 0;
+
+    virtual void set_topn_limit(size_t topn_limit) = 0;
 };
 
 } // namespace doris
diff --git a/be/src/olap/rowset/rowset_reader_context.h 
b/be/src/olap/rowset/rowset_reader_context.h
index d1b6253b13a..2b75de869d7 100644
--- a/be/src/olap/rowset/rowset_reader_context.h
+++ b/be/src/olap/rowset/rowset_reader_context.h
@@ -80,6 +80,7 @@ struct RowsetReaderContext {
     bool is_key_column_group = false;
     const std::set<int32_t>* output_columns = nullptr;
     RowsetId rowset_id;
+    size_t topn_limit = 0;
 };
 
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 28311f316a1..cbdd6ae2631 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -2070,6 +2070,9 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
 
     _current_batch_rows_read = 0;
     uint32_t nrows_read_limit = _opts.block_row_max;
+    if (_can_opt_topn_reads()) {
+        nrows_read_limit = std::min(static_cast<uint32_t>(_opts.topn_limit), 
nrows_read_limit);
+    }
     RETURN_IF_ERROR(_read_columns_by_index(
             nrows_read_limit, _current_batch_rows_read,
             _lazy_materialization_read || _opts.record_rowids || 
_is_need_expr_eval));
@@ -2511,5 +2514,21 @@ bool SegmentIterator::_has_delete_predicate(ColumnId 
cid) {
     return delete_columns_set.contains(cid);
 }
 
+bool SegmentIterator::_can_opt_topn_reads() const {
+    if (_opts.topn_limit <= 0) {
+        return false;
+    }
+
+    if (_opts.delete_condition_predicates->num_of_column_predicate() > 0) {
+        return false;
+    }
+
+    if (!_col_predicates.empty() || 
!_col_preds_except_leafnode_of_andnode.empty()) {
+        return false;
+    }
+
+    return true;
+}
+
 } // namespace segment_v2
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 5d32367f94d..925f669e424 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -327,6 +327,8 @@ private:
 
     bool _has_delete_predicate(ColumnId cid);
 
+    bool _can_opt_topn_reads() const;
+
     class BitmapRangeIterator;
     class BackwardBitmapRangeIterator;
 
diff --git a/be/src/vec/olap/vcollect_iterator.cpp 
b/be/src/vec/olap/vcollect_iterator.cpp
index 6112f30a47f..cf71826b40e 100644
--- a/be/src/vec/olap/vcollect_iterator.cpp
+++ b/be/src/vec/olap/vcollect_iterator.cpp
@@ -94,6 +94,7 @@ void VCollectIterator::init(TabletReader* reader, bool 
ori_data_overlapping, boo
 
 Status VCollectIterator::add_child(const RowSetSplits& rs_splits) {
     if (use_topn_next()) {
+        rs_splits.rs_reader->set_topn_limit(_topn_limit);
         _rs_splits.push_back(rs_splits);
         return Status::OK();
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to