github-actions[bot] commented on code in PR #24972:
URL: https://github.com/apache/doris/pull/24972#discussion_r1398627872


##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1946,6 +2014,200 @@ Status 
SegmentIterator::_read_columns_by_rowids(std::vector<ColumnId>& read_colu
     return Status::OK();
 }
 
+Status SegmentIterator::_process_late_arrival_predicates(

Review Comment:
   warning: function '_process_late_arrival_predicates' exceeds recommended 
size/complexity thresholds [readability-function-size]
   ```cpp
   Status SegmentIterator::_process_late_arrival_predicates(
                           ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2016:** 96 lines 
including whitespace and comments (threshold 80)
   ```cpp
   Status SegmentIterator::_process_late_arrival_predicates(
                           ^
   ```
   
   </details>
   



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1946,6 +2014,200 @@
     return Status::OK();
 }
 
+Status SegmentIterator::_process_late_arrival_predicates(
+        const std::vector<ColumnPredicate*>& predicates) {
+    std::set<ColumnId> columns_id;
+    std::set<ColumnId> vec_pred_col_id_set;
+    std::set<ColumnId> short_cir_pred_col_id_set;
+
+    bool has_vec_predicates = false;
+    bool has_short_predicates = false;
+
+    for (auto* predicate : predicates) {
+        const auto cid = predicate->column_id();
+        columns_id.emplace(cid);
+        if (_can_evaluated_by_vectorized(predicate)) {
+            vec_pred_col_id_set.emplace(cid);
+            _pre_eval_block_predicate.emplace_back(predicate);
+            has_vec_predicates = true;
+        } else {
+            short_cir_pred_col_id_set.emplace(cid);
+            _short_cir_eval_predicate.emplace_back(predicate);
+            has_short_predicates = true;
+        }
+
+        DCHECK_LT(cid, _is_pred_column.size());
+        _is_pred_column[cid] = true;
+    }
+
+    DCHECK(!_first_read_column_ids.empty());
+
+    /// Here remove all non-predicates(common exprs included) from 
`_first_read_column_ids`
+    if (_lazy_materialization_read) {
+        DCHECK(!_non_predicate_columns.empty() || 
!_second_read_column_ids.empty());
+        /// If there are no vec-predicates and short-predicates,
+        /// the columns in `_first_read_column_ids` are all common exprs'.
+        if (!(_is_need_vec_eval || _is_need_short_eval) && _is_need_expr_eval) 
{
+            DCHECK(_second_read_column_ids.empty());
+            _second_read_column_ids.assign(_first_read_column_ids.cbegin(),
+                                           _first_read_column_ids.cend());
+            _first_read_column_ids.clear();
+        }
+    } else {
+        DCHECK(_non_predicate_columns.empty());
+        DCHECK(_second_read_column_ids.empty());
+        if (_is_need_expr_eval) {
+            /// all common exprs' columns are in `_first_read_column_ids`
+            DCHECK(!(_is_need_vec_eval || _is_need_short_eval));
+            DCHECK(_second_read_column_ids.empty());
+            _second_read_column_ids.assign(_first_read_column_ids.cbegin(),
+                                           _first_read_column_ids.cend());
+            _first_read_column_ids.clear();
+
+        } else if (!(_is_need_vec_eval || _is_need_short_eval)) {
+            /// all non-predicate columns are in `_first_read_column_ids`
+            _non_predicate_columns.assign(_first_read_column_ids.cbegin(),
+                                          _first_read_column_ids.cend());
+            _first_read_column_ids.clear();
+        }
+        _lazy_materialization_read = true;
+    }
+
+    /// Now remove late arrival predicates' columns from 
`_second_read_column_ids` or `_non_predicate_columns`,
+    /// and put them into `_first_read_column_ids`.
+    for (auto cid : columns_id) {
+        if (std::find(_first_read_column_ids.begin(), 
_first_read_column_ids.end(), cid) !=
+            _first_read_column_ids.end()) {
+            DCHECK(_is_pred_column[cid]);
+            continue;
+        }
+
+        auto it = std::find(_second_read_column_ids.begin(), 
_second_read_column_ids.end(), cid);
+        if (it != _second_read_column_ids.end()) {
+            _second_read_column_ids.erase(it);
+        } else {
+            it = std::find(_non_predicate_columns.begin(), 
_non_predicate_columns.end(), cid);
+            if (it != _non_predicate_columns.end()) {
+                _non_predicate_columns.erase(it);
+            }
+        }
+
+        _first_read_column_ids.emplace_back(cid);
+        _is_pred_column[cid] = true;
+        const auto* column_desc = _schema->column(cid);
+        RETURN_IF_CATCH_EXCEPTION(_current_return_columns[cid] = 
Schema::get_predicate_column_ptr(
+                                          *column_desc, 
_opts.io_ctx.reader_type));
+        _current_return_columns[cid]->set_rowset_segment_id(
+                {_segment->rowset_id(), _segment->id()});
+        _current_return_columns[cid]->reserve(_opts.block_row_max);
+    }
+
+    if (has_vec_predicates) {
+        _is_need_vec_eval = true;
+    }
+    if (has_short_predicates) {
+        _is_need_short_eval = true;
+    }
+    DCHECK(_is_need_short_eval || _is_need_vec_eval);
+
+    return Status::OK();
+}
+
+Status SegmentIterator::_handle_late_arrival_predicates() {

Review Comment:
   warning: function '_handle_late_arrival_predicates' exceeds recommended 
size/complexity thresholds [readability-function-size]
   ```cpp
   Status SegmentIterator::_handle_late_arrival_predicates() {
                           ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/olap/rowset/segment_v2/segment_iterator.cpp:2115:** 93 lines 
including whitespace and comments (threshold 80)
   ```cpp
   Status SegmentIterator::_handle_late_arrival_predicates() {
                           ^
   ```
   
   </details>
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to