yiguolei commented on code in PR #15917:
URL: https://github.com/apache/doris/pull/15917#discussion_r1126011291
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1196,58 +1220,91 @@ void SegmentIterator::_vec_init_lazy_materialization() {
_is_need_short_eval = true;
}
- // Step 2: check non-predicate read costs to determine whether need lazy
materialization
- // fill _non_predicate_columns.
- // After some optimization, we suppose lazy materialization is better
performance.
+ // make _schema_block_id_map
+ _schema_block_id_map.resize(_schema.columns().size());
+ for (int i = 0; i < _schema.num_column_ids(); i++) {
+ auto cid = _schema.column_id(i);
+ _schema_block_id_map[cid] = i;
+ }
+
+ // Step2: extract columns that can execute expr context
+ _is_common_expr_column.resize(_schema.columns().size(), false);
+ if (_enable_common_expr_pushdown && _remaining_vconjunct_root != nullptr) {
+ _extract_common_expr_columns(_remaining_vconjunct_root);
+ if (!_common_expr_columns.empty()) {
+ _is_need_expr_eval = true;
+ for (auto cid : _schema.column_ids()) {
+ // pred column also needs to be filtered by expr
+ if (_is_common_expr_column[cid] || _is_pred_column[cid]) {
+ auto loc = _schema_block_id_map[cid];
+ _columns_to_filter.push_back(loc);
+ }
+ }
+ }
+ }
+
+ // Step 3: fill non predicate columns and second read column
+ // if _schema columns size equal to pred_column_ids size,
lazy_materialization_read is false,
+ // all columns are lazy materialization columns without non predicte
column.
+ // If common expr pushdown exists, and expr column is not contained in
lazy materialization columns,
+ // add to second read column, which will be read after lazy materialization
if (_schema.column_ids().size() > pred_column_ids.size()) {
for (auto cid : _schema.column_ids()) {
if (!_is_pred_column[cid]) {
- _non_predicate_columns.push_back(cid);
if (_is_need_vec_eval || _is_need_short_eval) {
_lazy_materialization_read = true;
}
+ if (!_is_common_expr_column[cid]) {
+ _non_predicate_columns.push_back(cid);
+ } else {
+ _second_read_column_ids.push_back(cid);
+ }
}
}
}
- // Step 3: fill column ids for read and output
+ // Step 4: fill first read columns
if (_lazy_materialization_read) {
// insert pred cid to first_read_columns
for (auto cid : pred_column_ids) {
_first_read_column_ids.push_back(cid);
}
- } else if (!_is_need_vec_eval &&
- !_is_need_short_eval) { // no pred exists, just read and output
column
+ } else if (!_is_need_vec_eval && !_is_need_short_eval &&
+ !_is_need_expr_eval) { // no pred exists, just read and output
column
for (int i = 0; i < _schema.num_column_ids(); i++) {
auto cid = _schema.column_id(i);
_first_read_column_ids.push_back(cid);
}
- } else { // pred exits, but we can eliminate lazy materialization
- // insert pred/non-pred cid to first read columns
- std::set<ColumnId> pred_id_set;
- pred_id_set.insert(_short_cir_pred_column_ids.begin(),
_short_cir_pred_column_ids.end());
- pred_id_set.insert(_vec_pred_column_ids.begin(),
_vec_pred_column_ids.end());
- std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(),
- _non_predicate_columns.end());
-
- for (int i = 0; i < _schema.num_column_ids(); i++) {
- auto cid = _schema.column_id(i);
- if (pred_id_set.find(cid) != pred_id_set.end()) {
- _first_read_column_ids.push_back(cid);
- } else if (non_pred_set.find(cid) != non_pred_set.end()) {
+ } else {
+ if (_is_need_vec_eval || _is_need_short_eval) {
+ // TODO To refactor, because we suppose lazy materialization is
better performance.
+ // pred exits, but we can eliminate lazy materialization
+ // insert pred/non-pred cid to first read columns
+ std::set<ColumnId> pred_id_set;
+ pred_id_set.insert(_short_cir_pred_column_ids.begin(),
+ _short_cir_pred_column_ids.end());
+ pred_id_set.insert(_vec_pred_column_ids.begin(),
_vec_pred_column_ids.end());
+ std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(),
+ _non_predicate_columns.end());
+
+ // _second_read_column_ids must be empty. Otherwise
_lazy_materialization_read must not false.
Review Comment:
ADD CHECK(!_second_read_column_ids.empty())
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]