This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new e0cd8599d2 [fix](delete) fix delete from bug which can get wrong
result (#17146)
e0cd8599d2 is described below
commit e0cd8599d2c8ddfcb3162f77609892ef3fefd78c
Author: xueweizhang <[email protected]>
AuthorDate: Tue Feb 28 09:20:10 2023 +0800
[fix](delete) fix delete from bug which can get wrong result (#17146)
理论上,如果是两次独立的删除,比如delete from table where a=1; delete from table where
a=2;其实这个地方应该可以使用的,但是目前的代码,是把所有不同版本的delete predicates和不同列的delete
predicates都放到一起了,失去了版本信息、失去了谓词间可能是and的关系,统一弱化成了delete predicates都是独立的,有一个delete
predicates满足条件,就把page都去掉。
这个pr的修改方式,就是在当前代码的基础上,当只有一个delete predicate的时候才能保证后续淘汰page的正确性,所以这里一律加了 ==
1的判断才传递delete predicates。
如果要把不同版本的delete predicates和不同列的delete
predicates作为完整和严谨的逻辑去判断page,需要修改的设计就有点多了,目前的方案算是一种优先解决bug的思路,后续可以进一步把delete
predicates这块加速zone判断进行page淘汰的逻辑完善,提高delete predicates使用的场景。
---
be/src/olap/delete_handler.cpp | 35 ++-
be/src/olap/delete_handler.h | 2 +-
be/src/olap/iterators.h | 2 +-
be/src/olap/rowset/beta_rowset_reader.cpp | 4 +-
be/src/olap/rowset/segment_v2/segment.cpp | 2 +-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 4 +-
.../data/delete_p0/test_zone_map_delete.out | 286 +++++++++++++++++++++
.../suites/delete_p0/test_zone_map_delete.groovy | 97 +++++++
8 files changed, 405 insertions(+), 27 deletions(-)
diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp
index 0b755ae890..313d48dfda 100644
--- a/be/src/olap/delete_handler.cpp
+++ b/be/src/olap/delete_handler.cpp
@@ -311,8 +311,8 @@ void DeleteHandler::finalize() {
void DeleteHandler::get_delete_conditions_after_version(
int64_t version, AndBlockColumnPredicate*
and_block_column_predicate_ptr,
- std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>*
col_id_to_del_predicates)
- const {
+ std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>*
+ del_predicates_for_zone_map) const {
for (auto& del_cond : _del_conds) {
if (del_cond.filter_version > version) {
// now, only query support delete column predicate operator
@@ -322,33 +322,28 @@ void DeleteHandler::get_delete_conditions_after_version(
new
SingleColumnBlockPredicate(del_cond.column_predicate_vec[0]);
and_block_column_predicate_ptr->add_column_predicate(
single_column_block_predicate);
- if (col_id_to_del_predicates->count(
+ if (del_predicates_for_zone_map->count(
del_cond.column_predicate_vec[0]->column_id())
< 1) {
- col_id_to_del_predicates->insert(
+ del_predicates_for_zone_map->insert(
{del_cond.column_predicate_vec[0]->column_id(),
std::vector<const ColumnPredicate*> {}});
}
-
(*col_id_to_del_predicates)[del_cond.column_predicate_vec[0]->column_id()]
+
(*del_predicates_for_zone_map)[del_cond.column_predicate_vec[0]->column_id()]
.push_back(del_cond.column_predicate_vec[0]);
} else {
auto or_column_predicate = new OrBlockColumnPredicate();
// build or_column_predicate
- std::for_each(
- del_cond.column_predicate_vec.cbegin(),
- del_cond.column_predicate_vec.cend(),
- [&or_column_predicate,
- col_id_to_del_predicates](const ColumnPredicate*
predicate) {
- if
(col_id_to_del_predicates->count(predicate->column_id()) < 1) {
- col_id_to_del_predicates->insert(
- {predicate->column_id(),
- std::vector<const
ColumnPredicate*> {}});
- }
-
(*col_id_to_del_predicates)[predicate->column_id()].push_back(
- predicate);
- or_column_predicate->add_column_predicate(
- new
SingleColumnBlockPredicate(predicate));
- });
+ // when delete from where a = 1 and b = 2, we can not use
del_predicates_for_zone_map to filter zone page,
+ // so here do not put predicate to
del_predicates_for_zone_map,
+ // refer #17145 for more details.
+ // // TODO: need refactor design and code to use more
version delete and more column delete to filter zone page.
+ std::for_each(del_cond.column_predicate_vec.cbegin(),
+ del_cond.column_predicate_vec.cend(),
+ [&or_column_predicate](const
ColumnPredicate* predicate) {
+
or_column_predicate->add_column_predicate(
+ new
SingleColumnBlockPredicate(predicate));
+ });
and_block_column_predicate_ptr->add_column_predicate(or_column_predicate);
}
}
diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h
index 815635ec53..0b33d638e1 100644
--- a/be/src/olap/delete_handler.h
+++ b/be/src/olap/delete_handler.h
@@ -101,7 +101,7 @@ public:
void get_delete_conditions_after_version(
int64_t version, AndBlockColumnPredicate*
and_block_column_predicate_ptr,
std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>*
- col_id_to_del_predicates) const;
+ del_predicates_for_zone_map) const;
private:
// Use regular expression to extract 'column_name', 'op' and 'operands'
diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index bb17a1fecf..58fd8f3ef7 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -101,7 +101,7 @@ public:
std::vector<ColumnPredicate*> column_predicates;
std::vector<ColumnPredicate*> column_predicates_except_leafnode_of_andnode;
std::unordered_map<int32_t, std::shared_ptr<AndBlockColumnPredicate>>
col_id_to_predicates;
- std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>
col_id_to_del_predicates;
+ std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>
del_predicates_for_zone_map;
TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE;
// REQUIRED (null is not allowed)
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp
b/be/src/olap/rowset/beta_rowset_reader.cpp
index 01d86965e5..e57bda9fea 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -38,7 +38,7 @@ void BetaRowsetReader::reset_read_options() {
_read_options.delete_condition_predicates =
std::make_shared<AndBlockColumnPredicate>();
_read_options.column_predicates.clear();
_read_options.col_id_to_predicates.clear();
- _read_options.col_id_to_del_predicates.clear();
+ _read_options.del_predicates_for_zone_map.clear();
_read_options.key_ranges.clear();
}
@@ -83,7 +83,7 @@ Status
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
if (read_context->delete_handler != nullptr) {
read_context->delete_handler->get_delete_conditions_after_version(
_rowset->end_version(),
_read_options.delete_condition_predicates.get(),
- &_read_options.col_id_to_del_predicates);
+ &_read_options.del_predicates_for_zone_map);
}
std::vector<uint32_t> read_columns;
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index c0f4d7fa89..ea0e6b44cd 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -129,7 +129,7 @@ Status Segment::new_iterator(const Schema& schema, const
StorageReadOptions& rea
}
RETURN_IF_ERROR(load_index());
- if (read_options.col_id_to_del_predicates.empty() &&
+ if (read_options.delete_condition_predicates->num_of_column_predicate() ==
0 &&
read_options.push_down_agg_type_opt != TPushAggOp::NONE) {
iter->reset(vectorized::new_vstatistics_iterator(this->shared_from_this(),
schema));
} else {
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 73feeb3539..e75a48ac8a 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -380,8 +380,8 @@ Status
SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
RETURN_IF_ERROR(_column_iterators[_schema.unique_id(cid)]->get_row_ranges_by_zone_map(
_opts.col_id_to_predicates[cid].get(),
- _opts.col_id_to_del_predicates.count(cid) > 0
- ? &(_opts.col_id_to_del_predicates[cid])
+ _opts.del_predicates_for_zone_map.count(cid) > 0
+ ? &(_opts.del_predicates_for_zone_map[cid])
: nullptr,
&column_row_ranges));
// intersect different columns's row ranges to get final row ranges by
zone map
diff --git a/regression-test/data/delete_p0/test_zone_map_delete.out
b/regression-test/data/delete_p0/test_zone_map_delete.out
index d119d5e8de..ba300220f4 100644
--- a/regression-test/data/delete_p0/test_zone_map_delete.out
+++ b/regression-test/data/delete_p0/test_zone_map_delete.out
@@ -57,3 +57,289 @@
-- !sql --
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+1 11
+1 22
+1 33
+1 44
+1 55
+1 66
+1 77
+1 88
+1 99
+1 100
+1 101
+1 102
+1 111
+1 122
+1 133
+1 144
+1 155
+1 166
+1 177
+1 188
+1 199
+1 200
+1 201
+1 202
+
+-- !sql --
+1 11
+1 22
+1 33
+1 44
+1 55
+1 66
+1 77
+1 88
+1 99
+1 100
+1 101
+1 102
+1 111
+1 122
+1 133
+1 144
+1 155
+1 166
+1 177
+1 188
+1 199
+1 200
+1 201
+1 202
+
+-- !sql --
+1 11
+1 22
+1 33
+1 44
+1 55
+1 66
+1 77
+1 88
+1 99
+1 100
+1 101
+1 102
+1 111
+1 122
+1 133
+1 144
+1 155
+1 166
+1 177
+1 188
+1 199
+1 200
+1 201
+1 202
+
+-- !sql --
+
+-- !sql --
+1 22
+1 33
+1 44
+1 55
+1 66
+1 77
+1 88
+1 99
+1 100
+1 101
+1 102
+1 111
+1 122
+1 133
+1 144
+1 155
+1 166
+1 177
+1 188
+1 199
+1 200
+1 201
+1 202
+
+-- !sql --
+1 22
+1 33
+1 44
+1 55
+1 66
+1 77
+1 88
+1 99
+1 100
+1 101
+1 102
+1 111
+1 122
+1 133
+1 144
+1 155
+1 166
+1 177
+1 188
+1 199
+1 200
+1 201
+1 202
+
+-- !sql --
+1 33
+1 44
+1 55
+1 66
+1 77
+1 88
+1 99
+1 100
+1 101
+1 102
+1 111
+1 122
+1 133
+1 144
+1 155
+1 166
+1 177
+1 188
+1 199
+1 200
+1 201
+1 202
+
+-- !sql --
+1 33
+1 44
+1 55
+1 66
+1 77
+1 88
+1 99
+1 100
+1 101
+1 102
+1 111
+1 122
+1 133
+1 144
+1 155
+1 166
+1 177
+1 188
+1 199
+1 200
+1 201
+1 202
+
+-- !sql --
+1 44
+1 55
+1 66
+1 77
+1 88
+1 99
+1 100
+1 101
+1 102
+1 111
+1 122
+1 133
+1 144
+1 155
+1 166
+1 177
+1 188
+1 199
+1 200
+1 201
+1 202
+
+-- !sql --
+1 44
+1 55
+1 66
+1 77
+1 88
+1 99
+1 100
+1 101
+1 102
+1 111
+1 122
+1 133
+1 144
+1 155
+1 166
+1 177
+1 188
+1 199
+1 200
+1 201
+1 202
+
+-- !sql --
+1 55
+1 66
+1 77
+1 88
+1 99
+1 100
+1 101
+1 102
+1 111
+1 122
+1 133
+1 144
+1 155
+1 166
+1 177
+1 188
+1 199
+1 200
+1 201
+1 202
+
+-- !sql --
+1 55
+1 66
+1 77
+1 88
+1 99
+1 100
+1 101
+1 102
+1 111
+1 122
+1 133
+1 144
+1 155
+1 166
+1 177
+1 188
+1 199
+1 200
+1 201
+1 202
+
+-- !sql --
+1 201
+1 202
+
+-- !sql --
+1 201
+
+-- !sql --
+1 11
+1 22
+
+-- !sql --
+1 11
+
diff --git a/regression-test/suites/delete_p0/test_zone_map_delete.groovy
b/regression-test/suites/delete_p0/test_zone_map_delete.groovy
index 5e8beb375d..f79830c815 100644
--- a/regression-test/suites/delete_p0/test_zone_map_delete.groovy
+++ b/regression-test/suites/delete_p0/test_zone_map_delete.groovy
@@ -54,4 +54,101 @@ suite("test_zone_map_delete") {
qt_sql """select * from ${tableName} ORDER BY k1;"""
sql """ DROP TABLE IF EXISTS ${tableName} """
+
+ // =========================
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName} (
+ `k1` bigint(20) NULL,
+ `k2` largeint(40) NULL,
+ `k3` largeint(40) NULL
+ ) ENGINE=OLAP
+ AGGREGATE KEY(`k1`, `k2`, `k3`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "in_memory" = "false",
+ "storage_format" = "V2",
+ "light_schema_change" = "true",
+ "disable_auto_compaction" = "true"
+ );
+ """
+
+ sql """ truncate table ${tableName}; """
+
+ sql """ insert into ${tableName}
values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);"""
+
+ sql """ delete from ${tableName} where k2=0;"""
+
+ sql """ delete from ${tableName} where k2=1;"""
+
+ qt_sql """ select k2,k3 from ${tableName} ORDER BY k3;"""
+
+ qt_sql """ select k2,k3 from ${tableName} where k2 = 1 ORDER BY k3;"""
+
+
+ sql """ truncate table ${tableName}; """
+
+ sql """ insert into ${tableName}
values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);"""
+
+ sql """ delete from ${tableName} where k2=1 and k2=0;"""
+
+ qt_sql """ select k2,k3 from ${tableName} ORDER BY k3;"""
+
+ qt_sql """ select k2,k3 from ${tableName} where k2 = 1 ORDER BY k3;"""
+
+
+ sql """ truncate table ${tableName}; """
+
+ sql """ insert into ${tableName}
values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);"""
+
+ sql """ delete from ${tableName} where k2 is null and k3=00;"""
+
+ qt_sql """ select k2,k3 from ${tableName} ORDER BY k3;"""
+
+ qt_sql """ select k2,k3 from ${tableName} where k2 is null ORDER BY k3;"""
+
+ sql """delete from ${tableName} where k2 is not null and k3=11;"""
+
+ qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
+
+ qt_sql """select k2,k3 from ${tableName} where k2 is not null ORDER BY
k3;"""
+
+ sql """delete from ${tableName} where k2=1 and k3=22;"""
+
+ qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
+
+ qt_sql """select k2,k3 from ${tableName} where k2=1 ORDER BY k3;"""
+
+ sql """delete from ${tableName} where k2!=0 and k3=33;"""
+
+ qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
+
+ qt_sql """select k2,k3 from ${tableName} where k2!=0 ORDER BY k3;"""
+
+ sql """delete from ${tableName} where k2 not in (0) and k3=44;"""
+
+ qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
+
+ qt_sql """select k2,k3 from ${tableName} where k2 not in (0) ORDER BY
k3;"""
+
+ sql """delete from ${tableName} where k2=1 and k3 >= 11 and k3 <=200;"""
+
+ qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
+
+ qt_sql """select k2,k3 from ${tableName} where k3 = 201 ORDER BY k3;"""
+
+
+ sql """truncate table ${tableName};"""
+
+ sql """insert into ${tableName}
values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);"""
+
+ sql """delete from ${tableName} where k2=1 and k3 <=202 and k3 >= 33;"""
+
+ qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
+
+ qt_sql """select k2,k3 from ${tableName} where k3 = 11 ORDER BY k3;"""
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]