[GitHub] [doris] yiguolei commented on a diff in pull request #15917: [improvement](scan) Support pushdown execute expr ctx

via GitHub Sun, 05 Mar 2023 23:06:20 -0800


yiguolei commented on code in PR #15917:
URL: https://github.com/apache/doris/pull/15917#discussion_r1126011291



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1196,58 +1220,91 @@ void SegmentIterator::_vec_init_lazy_materialization() {
         _is_need_short_eval = true;
     }
 
-    // Step 2: check non-predicate read costs to determine whether need lazy 
materialization
-    // fill _non_predicate_columns.
-    // After some optimization, we suppose lazy materialization is better 
performance.
+    // make _schema_block_id_map
+    _schema_block_id_map.resize(_schema.columns().size());
+    for (int i = 0; i < _schema.num_column_ids(); i++) {
+        auto cid = _schema.column_id(i);
+        _schema_block_id_map[cid] = i;
+    }
+
+    // Step2: extract columns that can execute expr context
+    _is_common_expr_column.resize(_schema.columns().size(), false);
+    if (_enable_common_expr_pushdown && _remaining_vconjunct_root != nullptr) {
+        _extract_common_expr_columns(_remaining_vconjunct_root);
+        if (!_common_expr_columns.empty()) {
+            _is_need_expr_eval = true;
+            for (auto cid : _schema.column_ids()) {
+                // pred column also needs to be filtered by expr
+                if (_is_common_expr_column[cid] || _is_pred_column[cid]) {
+                    auto loc = _schema_block_id_map[cid];
+                    _columns_to_filter.push_back(loc);
+                }
+            }
+        }
+    }
+
+    // Step 3: fill non predicate columns and second read column
+    // if _schema columns size equal to pred_column_ids size, 
lazy_materialization_read is false,
+    // all columns are lazy materialization columns without non predicte 
column.
+    // If common expr pushdown exists, and expr column is not contained in 
lazy materialization columns,
+    // add to second read column, which will be read after lazy materialization
     if (_schema.column_ids().size() > pred_column_ids.size()) {
         for (auto cid : _schema.column_ids()) {
             if (!_is_pred_column[cid]) {
-                _non_predicate_columns.push_back(cid);
                 if (_is_need_vec_eval || _is_need_short_eval) {
                     _lazy_materialization_read = true;
                 }
+                if (!_is_common_expr_column[cid]) {
+                    _non_predicate_columns.push_back(cid);
+                } else {
+                    _second_read_column_ids.push_back(cid);
+                }
             }
         }
     }
 
-    // Step 3: fill column ids for read and output
+    // Step 4: fill first read columns
     if (_lazy_materialization_read) {
         // insert pred cid to first_read_columns
         for (auto cid : pred_column_ids) {
             _first_read_column_ids.push_back(cid);
         }
-    } else if (!_is_need_vec_eval &&
-               !_is_need_short_eval) { // no pred exists, just read and output 
column
+    } else if (!_is_need_vec_eval && !_is_need_short_eval &&
+               !_is_need_expr_eval) { // no pred exists, just read and output 
column
         for (int i = 0; i < _schema.num_column_ids(); i++) {
             auto cid = _schema.column_id(i);
             _first_read_column_ids.push_back(cid);
         }
-    } else { // pred exits, but we can eliminate lazy materialization
-        // insert pred/non-pred cid to first read columns
-        std::set<ColumnId> pred_id_set;
-        pred_id_set.insert(_short_cir_pred_column_ids.begin(), 
_short_cir_pred_column_ids.end());
-        pred_id_set.insert(_vec_pred_column_ids.begin(), 
_vec_pred_column_ids.end());
-        std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(),
-                                        _non_predicate_columns.end());
-
-        for (int i = 0; i < _schema.num_column_ids(); i++) {
-            auto cid = _schema.column_id(i);
-            if (pred_id_set.find(cid) != pred_id_set.end()) {
-                _first_read_column_ids.push_back(cid);
-            } else if (non_pred_set.find(cid) != non_pred_set.end()) {
+    } else {
+        if (_is_need_vec_eval || _is_need_short_eval) {
+            // TODO To refactor, because we suppose lazy materialization is 
better performance.
+            // pred exits, but we can eliminate lazy materialization
+            // insert pred/non-pred cid to first read columns
+            std::set<ColumnId> pred_id_set;
+            pred_id_set.insert(_short_cir_pred_column_ids.begin(),
+                               _short_cir_pred_column_ids.end());
+            pred_id_set.insert(_vec_pred_column_ids.begin(), 
_vec_pred_column_ids.end());
+            std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(),
+                                            _non_predicate_columns.end());
+
+            // _second_read_column_ids must be empty. Otherwise 
_lazy_materialization_read must not false.

Review Comment:
   ADD CHECK(!_second_read_column_ids.empty())



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [doris] yiguolei commented on a diff in pull request #15917: [improvement](scan) Support pushdown execute expr ctx

Reply via email to