This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 4b256117dd3 [fix](expr) Re fix BE core dump while common expr filter
delete condition column #29328 (#29329)
4b256117dd3 is described below
commit 4b256117dd34450bf736924f707c55aeac8a262b
Author: Xinyi Zou <[email protected]>
AuthorDate: Fri Dec 29 23:24:46 2023 +0800
[fix](expr) Re fix BE core dump while common expr filter delete condition
column #29328 (#29329)
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 52 +++++++++-------------
1 file changed, 20 insertions(+), 32 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index d41b18609a4..0a786d7a545 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1454,8 +1454,6 @@ Status SegmentIterator::_seek_columns(const
std::vector<ColumnId>& column_ids, r
// todo(wb) need a UT here
Status SegmentIterator::_vec_init_lazy_materialization() {
_is_pred_column.resize(_schema->columns().size(), false);
- std::vector<bool> is_pred_column_no_del_condition;
- is_pred_column_no_del_condition.resize(_schema->columns().size(), false);
// including short/vec/delete pred
std::set<ColumnId> pred_column_ids;
@@ -1497,7 +1495,6 @@ Status SegmentIterator::_vec_init_lazy_materialization() {
for (auto predicate : _col_predicates) {
auto cid = predicate->column_id();
_is_pred_column[cid] = true;
- is_pred_column_no_del_condition[cid] = true;
pred_column_ids.insert(cid);
// check pred using short eval or vec eval
@@ -1551,16 +1548,10 @@ Status
SegmentIterator::_vec_init_lazy_materialization() {
if (!_common_expr_columns.empty()) {
_is_need_expr_eval = true;
for (auto cid : _schema->column_ids()) {
- // pred column also needs to be filtered by expr, exclude
delete condition column,
- // Delete condition column not need to be filtered, query
engine does not need it,
- // after _output_column_by_sel_idx, delete condition
materialize column will be erase
- // at the end of the block.
- // Eg:
- // `delete from table where a = 10;`
- // `select b from table;`
- // a column only effective in segment iterator, the block from
query engine only contain the b column,
- // so no need to filter a column by expr.
- if (_is_common_expr_column[cid] ||
is_pred_column_no_del_condition[cid]) {
+ // pred column also needs to be filtered by expr, exclude
additional delete condition column.
+ // if delete condition column not in the block, no filter is
needed
+ // and will be removed from _columns_to_filter in the first
next_batch.
+ if (_is_common_expr_column[cid] || _is_pred_column[cid]) {
auto loc = _schema_block_id_map[cid];
_columns_to_filter.push_back(loc);
}
@@ -2037,6 +2028,22 @@ Status
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
_current_return_columns[cid]->reserve(_opts.block_row_max);
}
}
+
+ // Additional deleted filter condition will be materialized column be
at the end of the block,
+ // after _output_column_by_sel_idx will be erase, we not need to
filter it,
+ // so erase it from _columns_to_filter in the first next_batch.
+ // Eg:
+ // `delete from table where a = 10;`
+ // `select b from table;`
+ // a column only effective in segment iterator, the block from query
engine only contain the b column,
+ // so no need to filter a column by expr.
+ for (auto it = _columns_to_filter.begin(); it !=
_columns_to_filter.end();) {
+ if (*it >= block->columns()) {
+ it = _columns_to_filter.erase(it);
+ } else {
+ ++it;
+ }
+ }
}
_init_current_block(block, _current_return_columns);
@@ -2051,25 +2058,6 @@ Status
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
_replace_version_col(_current_batch_rows_read);
}
- // If col >= block->columns(), it means col should not be filtered, there
is a BUG.
- // such as delete condition column was incorrectly put into
columns_to_filter,
- // which is usually at the end of the block. only check during the first
next_batch.
- if (_opts.stats->blocks_load == 0) {
- for (const auto& col : _columns_to_filter) {
- if (col >= block->columns()) {
- std::ostringstream ss;
- for (const auto& i : _columns_to_filter) {
- ss << i << "-";
- }
- throw Exception(
- ErrorCode::INTERNAL_ERROR,
- "filter block column id(index) greater than
block->columns(), "
- "column id={}, all columns that need filter={}, block
columns num={}",
- col, ss.str().substr(0, ss.str().length() - 1),
block->columns());
- }
- }
- }
-
_opts.stats->blocks_load += 1;
_opts.stats->raw_rows_read += _current_batch_rows_read;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]