yx-keith commented on code in PR #64377:
URL: https://github.com/apache/doris/pull/64377#discussion_r3396615199
##########
be/src/storage/segment/segment_iterator.cpp:
##########
@@ -1140,6 +1155,69 @@ Status SegmentIterator::_apply_ann_topn_predicate() {
return Status::OK();
}
+bool SegmentIterator::_enable_ann_topn_predicate_prefilter() const {
+ return !_opts.runtime_state ||
+
!_opts.runtime_state->query_options().__isset.enable_ann_topn_predicate_prefilter
||
+
_opts.runtime_state->query_options().enable_ann_topn_predicate_prefilter;
+}
+
+Status SegmentIterator::_eager_filter_predicates_into_bitmap() {
+ // The residual column predicates (those not resolvable by
zonemap/inverted/bitmap index, hence
+ // not yet reflected in _row_bitmap) are evaluated here over the candidate
rows, and the
+ // survivors are intersected back into _row_bitmap. The narrowed bitmap is
then fed to the ANN
+ // index as an IDSelector (see
AnnTopNRuntime::evaluate_vector_ann_search), so a predicated
+ // TopN query keeps using the index instead of degrading to an O(N)
brute-force distance scan.
+ //
+ // This runs before the main scan loop sets up _range_iter / _block_rowids
/
+ // _current_return_columns (see _lazy_init), so it allocates the predicate
columns and drives a
+ // local pass with the same _read_columns_by_index + predicate-evaluation
primitives the main
+ // loop uses. All of those members are re-initialized by the main loop
afterwards (_range_iter
+ // is recreated over the narrowed _row_bitmap right after
_apply_ann_topn_predicate returns).
+ if (!_is_need_vec_eval && !_is_need_short_eval) {
+ // has_column_predicate was true but every predicate was already
resolved via index;
+ // there is nothing left to evaluate per row.
+ return Status::OK();
+ }
+
+ _vec_init_char_column_id();
+ _current_return_columns.resize(_schema->columns().size());
+ for (auto cid : _predicate_column_ids) {
+ auto storage_column_type = _storage_name_and_type[cid].second;
+ RETURN_IF_CATCH_EXCEPTION(
+ _current_return_columns[cid] =
Schema::get_predicate_column_ptr(
+ _is_char_type[cid] ? FieldType::OLAP_FIELD_TYPE_CHAR
+ :
storage_column_type->get_storage_field_type(),
+ storage_column_type->is_nullable(),
_opts.io_ctx.reader_type));
+ _current_return_columns[cid]->set_rowset_segment_id(
+ {_segment->rowset_id(), _segment->id()});
+ }
+
+ if (_block_rowids.size() < _opts.block_row_max) {
+ _block_rowids.resize(_opts.block_row_max);
+ }
+
+ _range_iter.reset(new BitmapRangeIterator(_row_bitmap));
+ roaring::Roaring survivors;
+ while (true) {
Review Comment:
i have resolved this
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]