github-actions[bot] commented on code in PR #24972:
URL: https://github.com/apache/doris/pull/24972#discussion_r1398627872
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1946,6 +2014,200 @@ Status
SegmentIterator::_read_columns_by_rowids(std::vector<ColumnId>& read_colu
return Status::OK();
}
+Status SegmentIterator::_process_late_arrival_predicates(
Review Comment:
warning: function '_process_late_arrival_predicates' exceeds recommended
size/complexity thresholds [readability-function-size]
```cpp
Status SegmentIterator::_process_late_arrival_predicates(
^
```
<details>
<summary>Additional context</summary>
**be/src/olap/rowset/segment_v2/segment_iterator.cpp:2016:** 96 lines
including whitespace and comments (threshold 80)
```cpp
Status SegmentIterator::_process_late_arrival_predicates(
^
```
</details>
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1946,6 +2014,200 @@
return Status::OK();
}
+Status SegmentIterator::_process_late_arrival_predicates(
+ const std::vector<ColumnPredicate*>& predicates) {
+ std::set<ColumnId> columns_id;
+ std::set<ColumnId> vec_pred_col_id_set;
+ std::set<ColumnId> short_cir_pred_col_id_set;
+
+ bool has_vec_predicates = false;
+ bool has_short_predicates = false;
+
+ for (auto* predicate : predicates) {
+ const auto cid = predicate->column_id();
+ columns_id.emplace(cid);
+ if (_can_evaluated_by_vectorized(predicate)) {
+ vec_pred_col_id_set.emplace(cid);
+ _pre_eval_block_predicate.emplace_back(predicate);
+ has_vec_predicates = true;
+ } else {
+ short_cir_pred_col_id_set.emplace(cid);
+ _short_cir_eval_predicate.emplace_back(predicate);
+ has_short_predicates = true;
+ }
+
+ DCHECK_LT(cid, _is_pred_column.size());
+ _is_pred_column[cid] = true;
+ }
+
+ DCHECK(!_first_read_column_ids.empty());
+
+ /// Here remove all non-predicates(common exprs included) from
`_first_read_column_ids`
+ if (_lazy_materialization_read) {
+ DCHECK(!_non_predicate_columns.empty() ||
!_second_read_column_ids.empty());
+ /// If there are no vec-predicates and short-predicates,
+ /// the columns in `_first_read_column_ids` are all common exprs'.
+ if (!(_is_need_vec_eval || _is_need_short_eval) && _is_need_expr_eval)
{
+ DCHECK(_second_read_column_ids.empty());
+ _second_read_column_ids.assign(_first_read_column_ids.cbegin(),
+ _first_read_column_ids.cend());
+ _first_read_column_ids.clear();
+ }
+ } else {
+ DCHECK(_non_predicate_columns.empty());
+ DCHECK(_second_read_column_ids.empty());
+ if (_is_need_expr_eval) {
+ /// all common exprs' columns are in `_first_read_column_ids`
+ DCHECK(!(_is_need_vec_eval || _is_need_short_eval));
+ DCHECK(_second_read_column_ids.empty());
+ _second_read_column_ids.assign(_first_read_column_ids.cbegin(),
+ _first_read_column_ids.cend());
+ _first_read_column_ids.clear();
+
+ } else if (!(_is_need_vec_eval || _is_need_short_eval)) {
+ /// all non-predicate columns are in `_first_read_column_ids`
+ _non_predicate_columns.assign(_first_read_column_ids.cbegin(),
+ _first_read_column_ids.cend());
+ _first_read_column_ids.clear();
+ }
+ _lazy_materialization_read = true;
+ }
+
+ /// Now remove late arrival predicates' columns from
`_second_read_column_ids` or `_non_predicate_columns`,
+ /// and put them into `_first_read_column_ids`.
+ for (auto cid : columns_id) {
+ if (std::find(_first_read_column_ids.begin(),
_first_read_column_ids.end(), cid) !=
+ _first_read_column_ids.end()) {
+ DCHECK(_is_pred_column[cid]);
+ continue;
+ }
+
+ auto it = std::find(_second_read_column_ids.begin(),
_second_read_column_ids.end(), cid);
+ if (it != _second_read_column_ids.end()) {
+ _second_read_column_ids.erase(it);
+ } else {
+ it = std::find(_non_predicate_columns.begin(),
_non_predicate_columns.end(), cid);
+ if (it != _non_predicate_columns.end()) {
+ _non_predicate_columns.erase(it);
+ }
+ }
+
+ _first_read_column_ids.emplace_back(cid);
+ _is_pred_column[cid] = true;
+ const auto* column_desc = _schema->column(cid);
+ RETURN_IF_CATCH_EXCEPTION(_current_return_columns[cid] =
Schema::get_predicate_column_ptr(
+ *column_desc,
_opts.io_ctx.reader_type));
+ _current_return_columns[cid]->set_rowset_segment_id(
+ {_segment->rowset_id(), _segment->id()});
+ _current_return_columns[cid]->reserve(_opts.block_row_max);
+ }
+
+ if (has_vec_predicates) {
+ _is_need_vec_eval = true;
+ }
+ if (has_short_predicates) {
+ _is_need_short_eval = true;
+ }
+ DCHECK(_is_need_short_eval || _is_need_vec_eval);
+
+ return Status::OK();
+}
+
+Status SegmentIterator::_handle_late_arrival_predicates() {
Review Comment:
warning: function '_handle_late_arrival_predicates' exceeds recommended
size/complexity thresholds [readability-function-size]
```cpp
Status SegmentIterator::_handle_late_arrival_predicates() {
^
```
<details>
<summary>Additional context</summary>
**be/src/olap/rowset/segment_v2/segment_iterator.cpp:2115:** 93 lines
including whitespace and comments (threshold 80)
```cpp
Status SegmentIterator::_handle_late_arrival_predicates() {
^
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]