This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 7a04785b905 [opt](inverted index) topn opt reads only limit number of
records (#33665)
7a04785b905 is described below
commit 7a04785b905a2cbf60e7b8c378c7ab03b81da6c8
Author: zzzxl <[email protected]>
AuthorDate: Wed Apr 17 11:33:30 2024 +0800
[opt](inverted index) topn opt reads only limit number of records (#33665)
---
be/src/olap/iterators.h | 1 +
be/src/olap/rowset/beta_rowset_reader.cpp | 1 +
be/src/olap/rowset/beta_rowset_reader.h | 3 +++
be/src/olap/rowset/rowset_reader.h | 2 ++
be/src/olap/rowset/rowset_reader_context.h | 1 +
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 19 +++++++++++++++++++
be/src/olap/rowset/segment_v2/segment_iterator.h | 2 ++
be/src/vec/olap/vcollect_iterator.cpp | 1 +
8 files changed, 30 insertions(+)
diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index e4b62d157d1..95e905185ac 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -119,6 +119,7 @@ public:
// slots that cast may be eliminated in storage layer
std::map<std::string, PrimitiveType> target_cast_type_for_variants;
RowRanges row_ranges;
+ size_t topn_limit = 0;
};
class RowwiseIterator;
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp
b/be/src/olap/rowset/beta_rowset_reader.cpp
index c19c8ece353..80c3085ac7a 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -101,6 +101,7 @@ Status
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
_read_options.rowset_id = _rowset->rowset_id();
_read_options.version = _rowset->version();
_read_options.tablet_id = _rowset->rowset_meta()->tablet_id();
+ _read_options.topn_limit = _topn_limit;
if (_read_context->lower_bound_keys != nullptr) {
for (int i = 0; i < _read_context->lower_bound_keys->size(); ++i) {
_read_options.key_ranges.emplace_back(&_read_context->lower_bound_keys->at(i),
diff --git a/be/src/olap/rowset/beta_rowset_reader.h
b/be/src/olap/rowset/beta_rowset_reader.h
index fd275988164..ee23b2d9b9f 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -84,6 +84,8 @@ public:
RowsetReaderSharedPtr clone() override;
+ void set_topn_limit(size_t topn_limit) override { _topn_limit =
topn_limit; }
+
private:
[[nodiscard]] Status _init_iterator_once();
[[nodiscard]] Status _init_iterator();
@@ -123,6 +125,7 @@ private:
StorageReadOptions _read_options;
bool _empty = false;
+ size_t _topn_limit = 0;
};
} // namespace doris
diff --git a/be/src/olap/rowset/rowset_reader.h
b/be/src/olap/rowset/rowset_reader.h
index 45449952431..9ada1706d89 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -88,6 +88,8 @@ public:
virtual bool update_profile(RuntimeProfile* profile) = 0;
virtual RowsetReaderSharedPtr clone() = 0;
+
+ virtual void set_topn_limit(size_t topn_limit) = 0;
};
} // namespace doris
diff --git a/be/src/olap/rowset/rowset_reader_context.h
b/be/src/olap/rowset/rowset_reader_context.h
index d5683924a9e..6029196c9bb 100644
--- a/be/src/olap/rowset/rowset_reader_context.h
+++ b/be/src/olap/rowset/rowset_reader_context.h
@@ -83,6 +83,7 @@ struct RowsetReaderContext {
RowsetId rowset_id;
// slots that cast may be eliminated in storage layer
std::map<std::string, PrimitiveType> target_cast_type_for_variants;
+ size_t topn_limit = 0;
};
} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index e69d707499e..bee83282c48 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -2218,6 +2218,9 @@ Status
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
_current_batch_rows_read = 0;
uint32_t nrows_read_limit = _opts.block_row_max;
+ if (_can_opt_topn_reads()) {
+ nrows_read_limit = std::min(static_cast<uint32_t>(_opts.topn_limit),
nrows_read_limit);
+ }
RETURN_IF_ERROR(_read_columns_by_index(
nrows_read_limit, _current_batch_rows_read,
_lazy_materialization_read || _opts.record_rowids ||
_is_need_expr_eval));
@@ -2677,5 +2680,21 @@ bool SegmentIterator::_has_delete_predicate(ColumnId
cid) {
return delete_columns_set.contains(cid);
}
+bool SegmentIterator::_can_opt_topn_reads() const {
+ if (_opts.topn_limit <= 0) {
+ return false;
+ }
+
+ if (_opts.delete_condition_predicates->num_of_column_predicate() > 0) {
+ return false;
+ }
+
+ if (!_col_predicates.empty() ||
!_col_preds_except_leafnode_of_andnode.empty()) {
+ return false;
+ }
+
+ return true;
+}
+
} // namespace segment_v2
} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 1bf7cf22c3d..84c10f3b8b2 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -380,6 +380,8 @@ private:
bool _has_delete_predicate(ColumnId cid);
+ bool _can_opt_topn_reads() const;
+
class BitmapRangeIterator;
class BackwardBitmapRangeIterator;
diff --git a/be/src/vec/olap/vcollect_iterator.cpp
b/be/src/vec/olap/vcollect_iterator.cpp
index 90a38eef51e..0a6bafca49c 100644
--- a/be/src/vec/olap/vcollect_iterator.cpp
+++ b/be/src/vec/olap/vcollect_iterator.cpp
@@ -95,6 +95,7 @@ void VCollectIterator::init(TabletReader* reader, bool
ori_data_overlapping, boo
Status VCollectIterator::add_child(const RowSetSplits& rs_splits) {
if (use_topn_next()) {
+ rs_splits.rs_reader->set_topn_limit(_topn_limit);
_rs_splits.push_back(rs_splits);
return Status::OK();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]