github-actions[bot] commented on code in PR #64262:
URL: https://github.com/apache/doris/pull/64262#discussion_r3407652188
##########
be/src/storage/segment/segment_iterator.cpp:
##########
@@ -800,20 +805,35 @@ Status SegmentIterator::_get_row_ranges_by_keys() {
return Status::OK();
}
- // Read & seek key columns is a waste of time when no key column in _schema
- if (std::none_of(_schema->columns().begin(), _schema->columns().end(),
- [&](const TabletColumnPtr& col) {
- return col &&
-
_opts.tablet_schema->column_by_uid(col->unique_id()).is_key();
- })) {
- return Status::OK();
+ bool use_pk_index_for_row_store_only = false;
+ if (_is_row_store_only_segment()) {
+ const auto& tablet_schema = _segment->tablet_schema();
+ if (tablet_schema->keys_type() != UNIQUE_KEYS ||
+ _segment->get_primary_key_index() == nullptr) {
+ return Status::NotSupported(
+ "row_store_only key range pruning requires primary key
index, tablet_id={}, "
+ "rowset_id={}, segment_id={}",
+ _opts.tablet_id, _segment->rowset_id().to_string(),
_segment->id());
+ }
+ use_pk_index_for_row_store_only = true;
+ } else {
+ // Read & seek key columns is a waste of time when no key column in
_schema
+ if (std::none_of(_schema->columns().begin(), _schema->columns().end(),
+ [&](const TabletColumnPtr& col) {
+ return col &&
+
_opts.tablet_schema->column_by_uid(col->unique_id()).is_key();
+ })) {
+ return Status::OK();
+ }
}
RowRanges result_ranges;
for (auto& key_range : _opts.key_ranges) {
rowid_t lower_rowid = 0;
rowid_t upper_rowid = num_rows();
- RETURN_IF_ERROR(_prepare_seek(key_range));
+ if (!use_pk_index_for_row_store_only) {
+ RETURN_IF_ERROR(_prepare_seek(key_range));
+ }
if (key_range.upper_key != nullptr) {
// If client want to read upper_bound, the include_upper is true.
So we
// should get the first ordinal at which key is larger than
upper_bound.
Review Comment:
For row-store-only segments this skips
`_get_row_ranges_by_column_conditions()`, but `_lazy_init()` still calls
`_get_row_ranges_from_conditions()` a few lines later whenever
`_opts.col_id_to_predicates` is non-empty. That later helper calls
`_column_iterators[cid]->get_row_ranges_by_dict/bloom_filter/zone_map`, while
the row-store-only `_init_return_column_iterators()` only creates iterators for
rowid/virtual columns and the hidden row-store column, not ordinary logical
columns. A query like `SELECT * FROM t WHERE v1 = 10` will therefore
dereference a null iterator before the JSONB predicate path gets a chance to
evaluate the filter. Please also bypass `_get_row_ranges_from_conditions()` for
row-store-only scans (or make it skip non-persisted logical columns) and add a
non-key predicate regression case.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]