This is an automated email from the ASF dual-hosted git repository.
gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 42b2725f03 [Bug](delete) Fix wrong delete operation (#13840)
42b2725f03 is described below
commit 42b2725f035ba0a3e7bfc032420715a2f6de351b
Author: Gabriel <[email protected]>
AuthorDate: Tue Nov 1 13:38:43 2022 +0800
[Bug](delete) Fix wrong delete operation (#13840)
---
be/src/olap/column_predicate.h | 4 ++
be/src/olap/comparison_predicate.h | 40 +++++++++++++++
be/src/olap/in_list_predicate.h | 22 ++++++++
be/src/olap/null_predicate.h | 8 +++
be/src/olap/rowset/segment_v2/column_reader.cpp | 6 ++-
.../data/delete_p0/test_zone_map_delete.out | 59 ++++++++++++++++++++++
.../suites/delete_p0/test_zone_map_delete.groovy | 57 +++++++++++++++++++++
7 files changed, 194 insertions(+), 2 deletions(-)
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index 9d0e50a055..e20683c205 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -101,6 +101,10 @@ public:
return true;
}
+ virtual bool evaluate_del(const std::pair<WrapperField*, WrapperField*>&
statistic) const {
+ return false;
+ }
+
virtual bool evaluate_and(const BloomFilter* bf) const { return true; }
virtual bool can_do_bloom_filter() const { return false; }
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index 6fcb45bd3c..914b1989d3 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -254,6 +254,46 @@ public:
}
}
+ bool evaluate_del(const std::pair<WrapperField*, WrapperField*>&
statistic) const override {
+ if (statistic.first->is_null() || statistic.second->is_null()) {
+ return false;
+ }
+ if constexpr (PT == PredicateType::EQ) {
+ if constexpr (Type == TYPE_DATE) {
+ T tmp_min_uint32_value = 0;
+ memcpy((char*)(&tmp_min_uint32_value),
statistic.first->cell_ptr(),
+ sizeof(uint24_t));
+ T tmp_max_uint32_value = 0;
+ memcpy((char*)(&tmp_max_uint32_value),
statistic.second->cell_ptr(),
+ sizeof(uint24_t));
+ return _operator(tmp_min_uint32_value == _value &&
tmp_max_uint32_value == _value,
+ true);
+ } else {
+ return *reinterpret_cast<const
T*>(statistic.first->cell_ptr()) == _value &&
+ *reinterpret_cast<const
T*>(statistic.second->cell_ptr()) == _value;
+ }
+ } else if constexpr (PT == PredicateType::NE) {
+ if constexpr (Type == TYPE_DATE) {
+ T tmp_min_uint32_value = 0;
+ memcpy((char*)(&tmp_min_uint32_value),
statistic.first->cell_ptr(),
+ sizeof(uint24_t));
+ T tmp_max_uint32_value = 0;
+ memcpy((char*)(&tmp_max_uint32_value),
statistic.second->cell_ptr(),
+ sizeof(uint24_t));
+ return tmp_min_uint32_value > _value || tmp_max_uint32_value <
_value;
+ } else {
+ return *reinterpret_cast<const
T*>(statistic.first->cell_ptr()) > _value ||
+ *reinterpret_cast<const
T*>(statistic.second->cell_ptr()) < _value;
+ }
+ } else if constexpr (PT == PredicateType::LT || PT ==
PredicateType::LE) {
+ COMPARE_TO_MIN_OR_MAX(second)
+ } else {
+ static_assert(PT == PredicateType::GT || PT == PredicateType::GE);
+ COMPARE_TO_MIN_OR_MAX(first)
+ }
+ }
+#undef COMPARE_TO_MIN_OR_MAX
+
bool evaluate_and(const segment_v2::BloomFilter* bf) const override {
if constexpr (PT == PredicateType::EQ) {
if constexpr (std::is_same_v<T, StringValue>) {
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 1535f01e41..33e440a5e6 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -266,6 +266,28 @@ public:
}
}
+ bool evaluate_del(const std::pair<WrapperField*, WrapperField*>&
statistic) const override {
+ if (statistic.first->is_null() || statistic.second->is_null()) {
+ return false;
+ }
+ if constexpr (PT == PredicateType::NOT_IN_LIST) {
+ if constexpr (Type == TYPE_DATE) {
+ T tmp_min_uint32_value = 0;
+ memcpy((char*)(&tmp_min_uint32_value),
statistic.first->cell_ptr(),
+ sizeof(uint24_t));
+ T tmp_max_uint32_value = 0;
+ memcpy((char*)(&tmp_max_uint32_value),
statistic.second->cell_ptr(),
+ sizeof(uint24_t));
+ return tmp_min_uint32_value > _max_value ||
tmp_max_uint32_value < _min_value;
+ } else {
+ return *reinterpret_cast<const
T*>(statistic.first->cell_ptr()) > _max_value ||
+ *reinterpret_cast<const
T*>(statistic.second->cell_ptr()) < _min_value;
+ }
+ } else {
+ return false;
+ }
+ }
+
bool evaluate_and(const segment_v2::BloomFilter* bf) const override {
if constexpr (PT == PredicateType::IN_LIST) {
for (auto value : _values) {
diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h
index 8ca866d212..7279f01259 100644
--- a/be/src/olap/null_predicate.h
+++ b/be/src/olap/null_predicate.h
@@ -59,6 +59,14 @@ public:
}
}
+ bool evaluate_del(const std::pair<WrapperField*, WrapperField*>&
statistic) const override {
+ if (_is_null) {
+ return statistic.first->is_null() && statistic.second->is_null();
+ } else {
+ return !statistic.first->is_null() && !statistic.second->is_null();
+ }
+ }
+
bool evaluate_and(const segment_v2::BloomFilter* bf) const override {
if (_is_null) {
return bf->test_bytes(nullptr, 0);
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 6ee44e6e0a..96edd618ba 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -274,8 +274,10 @@ Status ColumnReader::_get_filtered_pages(const
AndBlockColumnPredicate* col_pred
bool should_read = true;
if (delete_predicates != nullptr) {
for (auto del_pred : *delete_predicates) {
- if (min_value.get() == nullptr || max_value.get() ==
nullptr ||
- del_pred->evaluate_and({min_value.get(),
max_value.get()})) {
+ // TODO: Both `min_value` and `max_value` should be 0
or neither should be 0.
+ // So nullable only need to judge once.
+ if (min_value.get() != nullptr && max_value.get() !=
nullptr &&
+ del_pred->evaluate_del({min_value.get(),
max_value.get()})) {
should_read = false;
break;
}
diff --git a/regression-test/data/delete_p0/test_zone_map_delete.out
b/regression-test/data/delete_p0/test_zone_map_delete.out
new file mode 100644
index 0000000000..d119d5e8de
--- /dev/null
+++ b/regression-test/data/delete_p0/test_zone_map_delete.out
@@ -0,0 +1,59 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+2 2 2
+2 2 2
+2 2 2
+2 2 2
+2 2 2
+2 2 2
+3 3 3
+3 3 3
+3 3 3
+3 3 3
+3 3 3
+3 3 3
+
+-- !sql --
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+2 2 2
+2 2 2
+2 2 2
+2 2 2
+2 2 2
+2 2 2
+4 4 4
+4 4 4
+4 4 4
+4 4 4
+4 4 4
+4 4 4
+5 5 5
+5 5 5
+5 5 5
+5 5 5
+5 5 5
+5 5 5
+
+-- !sql --
+
+-- !sql --
+3 3 3
+3 3 3
+3 3 3
+3 3 3
+3 3 3
+3 3 3
+
+-- !sql --
+
diff --git a/regression-test/suites/delete_p0/test_zone_map_delete.groovy
b/regression-test/suites/delete_p0/test_zone_map_delete.groovy
new file mode 100644
index 0000000000..5e8beb375d
--- /dev/null
+++ b/regression-test/suites/delete_p0/test_zone_map_delete.groovy
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_zone_map_delete") {
+ def tableName = "test_zone_map_delete_tbl"
+
+ // comparison predicate
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( `k1` int(11) NULL,
`k2` int(11) NULL, `v1` int(11) NULL )DUPLICATE KEY(`k1`,k2) DISTRIBUTED BY
HASH(`k1`) BUCKETS 1 PROPERTIES("replication_num" = "1");"""
+ sql """insert into ${tableName} values(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), (2,2,2),(3,3,3),(4,4,4),(5,5,5);"""
+ sql """delete from ${tableName} where v1 > 3;"""
+ qt_sql """select * from ${tableName} ORDER BY k1;"""
+
+ // in predicate
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( `k1` int(11) NULL,
`k2` int(11) NULL, `v1` int(11) NULL )DUPLICATE KEY(`k1`,k2) DISTRIBUTED BY
HASH(`k1`) BUCKETS 1 PROPERTIES("replication_num" = "1");"""
+ sql """insert into ${tableName} values(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), (2,2,2),(3,3,3),(4,4,4),(5,5,5);"""
+ sql """delete from ${tableName} where v1 in (3);"""
+ qt_sql """select * from ${tableName} ORDER BY k1;"""
+
+ // null predicate
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( `k1` int(11) NULL,
`k2` int(11) NULL, `v1` int(11) NULL )DUPLICATE KEY(`k1`,k2) DISTRIBUTED BY
HASH(`k1`) BUCKETS 1 PROPERTIES("replication_num" = "1");"""
+ sql """insert into ${tableName} values(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), (2,2,2),(3,3,3),(4,4,4),(5,5,5);"""
+ sql """delete from ${tableName} where v1 IS NOT NULL;"""
+ qt_sql """select * from ${tableName} ORDER BY k1;"""
+
+ // not in predicate
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( `k1` int(11) NULL,
`k2` int(11) NULL, `v1` int(11) NULL )DUPLICATE KEY(`k1`,k2) DISTRIBUTED BY
HASH(`k1`) BUCKETS 1 PROPERTIES("replication_num" = "1");"""
+ sql """insert into ${tableName} values(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), (2,2,2),(3,3,3),(4,4,4),(5,5,5);"""
+ sql """delete from ${tableName} where v1 not in (3);"""
+ qt_sql """select * from ${tableName} ORDER BY k1;"""
+
+ // not in predicate
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """ CREATE TABLE IF NOT EXISTS ${tableName} ( `k1` int(11) NULL,
`k2` int(11) NULL, `v1` int(11) NULL )DUPLICATE KEY(`k1`,k2) DISTRIBUTED BY
HASH(`k1`) BUCKETS 1 PROPERTIES("replication_num" = "1");"""
+ sql """insert into ${tableName} values(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1),
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), (2,2,2),(3,3,3),(4,4,4),(5,5,5);"""
+ sql """delete from ${tableName} where v1 not in (0);"""
+ qt_sql """select * from ${tableName} ORDER BY k1;"""
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]