This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 1c85db2c215 [opt](inverted index) topn opt reads only limit number of
records #33163 (#33602)
1c85db2c215 is described below
commit 1c85db2c2151401fea4e6d8f6f323be3aed36804
Author: zzzxl <[email protected]>
AuthorDate: Thu Apr 18 08:56:07 2024 +0800
[opt](inverted index) topn opt reads only limit number of records #33163
(#33602)
---
be/src/olap/iterators.h | 1 +
be/src/olap/rowset/beta_rowset_reader.cpp | 1 +
be/src/olap/rowset/beta_rowset_reader.h | 3 +++
be/src/olap/rowset/rowset_reader.h | 2 ++
be/src/olap/rowset/rowset_reader_context.h | 1 +
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 19 +++++++++++++++++++
be/src/olap/rowset/segment_v2/segment_iterator.h | 2 ++
be/src/vec/olap/vcollect_iterator.cpp | 1 +
8 files changed, 30 insertions(+)
diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index 3b9d205e83d..68ebc98570c 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -110,6 +110,7 @@ public:
RowsetId rowset_id;
Version version;
int32_t tablet_id = 0;
+ size_t topn_limit = 0;
};
class RowwiseIterator;
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp
b/be/src/olap/rowset/beta_rowset_reader.cpp
index bb11347990c..1394e3a006f 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -101,6 +101,7 @@ Status
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
_read_options.rowset_id = _rowset->rowset_id();
_read_options.version = _rowset->version();
_read_options.tablet_id = _rowset->rowset_meta()->tablet_id();
+ _read_options.topn_limit = _topn_limit;
if (_read_context->lower_bound_keys != nullptr) {
for (int i = 0; i < _read_context->lower_bound_keys->size(); ++i) {
_read_options.key_ranges.emplace_back(&_read_context->lower_bound_keys->at(i),
diff --git a/be/src/olap/rowset/beta_rowset_reader.h
b/be/src/olap/rowset/beta_rowset_reader.h
index 2a9a5cf8072..db0b1facc73 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -84,6 +84,8 @@ public:
RowsetReaderSharedPtr clone() override;
+ void set_topn_limit(size_t topn_limit) override { _topn_limit =
topn_limit; }
+
private:
Status _init_iterator_once();
Status _init_iterator();
@@ -122,6 +124,7 @@ private:
StorageReadOptions _read_options;
bool _empty = false;
+ size_t _topn_limit = 0;
};
} // namespace doris
diff --git a/be/src/olap/rowset/rowset_reader.h
b/be/src/olap/rowset/rowset_reader.h
index 4b3791da917..34caa615c25 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -88,6 +88,8 @@ public:
virtual bool update_profile(RuntimeProfile* profile) = 0;
virtual RowsetReaderSharedPtr clone() = 0;
+
+ virtual void set_topn_limit(size_t topn_limit) = 0;
};
} // namespace doris
diff --git a/be/src/olap/rowset/rowset_reader_context.h
b/be/src/olap/rowset/rowset_reader_context.h
index d1b6253b13a..2b75de869d7 100644
--- a/be/src/olap/rowset/rowset_reader_context.h
+++ b/be/src/olap/rowset/rowset_reader_context.h
@@ -80,6 +80,7 @@ struct RowsetReaderContext {
bool is_key_column_group = false;
const std::set<int32_t>* output_columns = nullptr;
RowsetId rowset_id;
+ size_t topn_limit = 0;
};
} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 28311f316a1..cbdd6ae2631 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -2070,6 +2070,9 @@ Status
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
_current_batch_rows_read = 0;
uint32_t nrows_read_limit = _opts.block_row_max;
+ if (_can_opt_topn_reads()) {
+ nrows_read_limit = std::min(static_cast<uint32_t>(_opts.topn_limit),
nrows_read_limit);
+ }
RETURN_IF_ERROR(_read_columns_by_index(
nrows_read_limit, _current_batch_rows_read,
_lazy_materialization_read || _opts.record_rowids ||
_is_need_expr_eval));
@@ -2511,5 +2514,21 @@ bool SegmentIterator::_has_delete_predicate(ColumnId
cid) {
return delete_columns_set.contains(cid);
}
+bool SegmentIterator::_can_opt_topn_reads() const {
+ if (_opts.topn_limit <= 0) {
+ return false;
+ }
+
+ if (_opts.delete_condition_predicates->num_of_column_predicate() > 0) {
+ return false;
+ }
+
+ if (!_col_predicates.empty() ||
!_col_preds_except_leafnode_of_andnode.empty()) {
+ return false;
+ }
+
+ return true;
+}
+
} // namespace segment_v2
} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 5d32367f94d..925f669e424 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -327,6 +327,8 @@ private:
bool _has_delete_predicate(ColumnId cid);
+ bool _can_opt_topn_reads() const;
+
class BitmapRangeIterator;
class BackwardBitmapRangeIterator;
diff --git a/be/src/vec/olap/vcollect_iterator.cpp
b/be/src/vec/olap/vcollect_iterator.cpp
index 6112f30a47f..cf71826b40e 100644
--- a/be/src/vec/olap/vcollect_iterator.cpp
+++ b/be/src/vec/olap/vcollect_iterator.cpp
@@ -94,6 +94,7 @@ void VCollectIterator::init(TabletReader* reader, bool
ori_data_overlapping, boo
Status VCollectIterator::add_child(const RowSetSplits& rs_splits) {
if (use_topn_next()) {
+ rs_splits.rs_reader->set_topn_limit(_topn_limit);
_rs_splits.push_back(rs_splits);
return Status::OK();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]