[doris] branch master updated: [Bug](delete) Fix wrong delete operation (#13840)

gabriellee Mon, 31 Oct 2022 22:38:56 -0700

This is an automated email from the ASF dual-hosted git repository.

gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 42b2725f03 [Bug](delete) Fix wrong delete operation (#13840)
42b2725f03 is described below

commit 42b2725f035ba0a3e7bfc032420715a2f6de351b
Author: Gabriel <[email protected]>
AuthorDate: Tue Nov 1 13:38:43 2022 +0800

    [Bug](delete) Fix wrong delete operation (#13840)
---
 be/src/olap/column_predicate.h                     |  4 ++
 be/src/olap/comparison_predicate.h                 | 40 +++++++++++++++
 be/src/olap/in_list_predicate.h                    | 22 ++++++++
 be/src/olap/null_predicate.h                       |  8 +++
 be/src/olap/rowset/segment_v2/column_reader.cpp    |  6 ++-
 .../data/delete_p0/test_zone_map_delete.out        | 59 ++++++++++++++++++++++
 .../suites/delete_p0/test_zone_map_delete.groovy   | 57 +++++++++++++++++++++
 7 files changed, 194 insertions(+), 2 deletions(-)

diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index 9d0e50a055..e20683c205 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -101,6 +101,10 @@ public:
         return true;
     }
 
+    virtual bool evaluate_del(const std::pair<WrapperField*, WrapperField*>& 
statistic) const {
+        return false;
+    }
+
     virtual bool evaluate_and(const BloomFilter* bf) const { return true; }
 
     virtual bool can_do_bloom_filter() const { return false; }
diff --git a/be/src/olap/comparison_predicate.h 
b/be/src/olap/comparison_predicate.h
index 6fcb45bd3c..914b1989d3 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -254,6 +254,46 @@ public:
         }
     }
 
+    bool evaluate_del(const std::pair<WrapperField*, WrapperField*>& 
statistic) const override {
+        if (statistic.first->is_null() || statistic.second->is_null()) {
+            return false;
+        }
+        if constexpr (PT == PredicateType::EQ) {
+            if constexpr (Type == TYPE_DATE) {
+                T tmp_min_uint32_value = 0;
+                memcpy((char*)(&tmp_min_uint32_value), 
statistic.first->cell_ptr(),
+                       sizeof(uint24_t));
+                T tmp_max_uint32_value = 0;
+                memcpy((char*)(&tmp_max_uint32_value), 
statistic.second->cell_ptr(),
+                       sizeof(uint24_t));
+                return _operator(tmp_min_uint32_value == _value && 
tmp_max_uint32_value == _value,
+                                 true);
+            } else {
+                return *reinterpret_cast<const 
T*>(statistic.first->cell_ptr()) == _value &&
+                       *reinterpret_cast<const 
T*>(statistic.second->cell_ptr()) == _value;
+            }
+        } else if constexpr (PT == PredicateType::NE) {
+            if constexpr (Type == TYPE_DATE) {
+                T tmp_min_uint32_value = 0;
+                memcpy((char*)(&tmp_min_uint32_value), 
statistic.first->cell_ptr(),
+                       sizeof(uint24_t));
+                T tmp_max_uint32_value = 0;
+                memcpy((char*)(&tmp_max_uint32_value), 
statistic.second->cell_ptr(),
+                       sizeof(uint24_t));
+                return tmp_min_uint32_value > _value || tmp_max_uint32_value < 
_value;
+            } else {
+                return *reinterpret_cast<const 
T*>(statistic.first->cell_ptr()) > _value ||
+                       *reinterpret_cast<const 
T*>(statistic.second->cell_ptr()) < _value;
+            }
+        } else if constexpr (PT == PredicateType::LT || PT == 
PredicateType::LE) {
+            COMPARE_TO_MIN_OR_MAX(second)
+        } else {
+            static_assert(PT == PredicateType::GT || PT == PredicateType::GE);
+            COMPARE_TO_MIN_OR_MAX(first)
+        }
+    }
+#undef COMPARE_TO_MIN_OR_MAX
+
     bool evaluate_and(const segment_v2::BloomFilter* bf) const override {
         if constexpr (PT == PredicateType::EQ) {
             if constexpr (std::is_same_v<T, StringValue>) {
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 1535f01e41..33e440a5e6 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -266,6 +266,28 @@ public:
         }
     }
 
+    bool evaluate_del(const std::pair<WrapperField*, WrapperField*>& 
statistic) const override {
+        if (statistic.first->is_null() || statistic.second->is_null()) {
+            return false;
+        }
+        if constexpr (PT == PredicateType::NOT_IN_LIST) {
+            if constexpr (Type == TYPE_DATE) {
+                T tmp_min_uint32_value = 0;
+                memcpy((char*)(&tmp_min_uint32_value), 
statistic.first->cell_ptr(),
+                       sizeof(uint24_t));
+                T tmp_max_uint32_value = 0;
+                memcpy((char*)(&tmp_max_uint32_value), 
statistic.second->cell_ptr(),
+                       sizeof(uint24_t));
+                return tmp_min_uint32_value > _max_value || 
tmp_max_uint32_value < _min_value;
+            } else {
+                return *reinterpret_cast<const 
T*>(statistic.first->cell_ptr()) > _max_value ||
+                       *reinterpret_cast<const 
T*>(statistic.second->cell_ptr()) < _min_value;
+            }
+        } else {
+            return false;
+        }
+    }
+
     bool evaluate_and(const segment_v2::BloomFilter* bf) const override {
         if constexpr (PT == PredicateType::IN_LIST) {
             for (auto value : _values) {
diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h
index 8ca866d212..7279f01259 100644
--- a/be/src/olap/null_predicate.h
+++ b/be/src/olap/null_predicate.h
@@ -59,6 +59,14 @@ public:
         }
     }
 
+    bool evaluate_del(const std::pair<WrapperField*, WrapperField*>& 
statistic) const override {
+        if (_is_null) {
+            return statistic.first->is_null() && statistic.second->is_null();
+        } else {
+            return !statistic.first->is_null() && !statistic.second->is_null();
+        }
+    }
+
     bool evaluate_and(const segment_v2::BloomFilter* bf) const override {
         if (_is_null) {
             return bf->test_bytes(nullptr, 0);
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 6ee44e6e0a..96edd618ba 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -274,8 +274,10 @@ Status ColumnReader::_get_filtered_pages(const 
AndBlockColumnPredicate* col_pred
                 bool should_read = true;
                 if (delete_predicates != nullptr) {
                     for (auto del_pred : *delete_predicates) {
-                        if (min_value.get() == nullptr || max_value.get() == 
nullptr ||
-                            del_pred->evaluate_and({min_value.get(), 
max_value.get()})) {
+                        // TODO: Both `min_value` and `max_value` should be 0 
or neither should be 0.
+                        //  So nullable only need to judge once.
+                        if (min_value.get() != nullptr && max_value.get() != 
nullptr &&
+                            del_pred->evaluate_del({min_value.get(), 
max_value.get()})) {
                             should_read = false;
                             break;
                         }
diff --git a/regression-test/data/delete_p0/test_zone_map_delete.out 
b/regression-test/data/delete_p0/test_zone_map_delete.out
new file mode 100644
index 0000000000..d119d5e8de
--- /dev/null
+++ b/regression-test/data/delete_p0/test_zone_map_delete.out
@@ -0,0 +1,59 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      1       1
+1      1       1
+1      1       1
+1      1       1
+1      1       1
+1      1       1
+2      2       2
+2      2       2
+2      2       2
+2      2       2
+2      2       2
+2      2       2
+3      3       3
+3      3       3
+3      3       3
+3      3       3
+3      3       3
+3      3       3
+
+-- !sql --
+1      1       1
+1      1       1
+1      1       1
+1      1       1
+1      1       1
+1      1       1
+2      2       2
+2      2       2
+2      2       2
+2      2       2
+2      2       2
+2      2       2
+4      4       4
+4      4       4
+4      4       4
+4      4       4
+4      4       4
+4      4       4
+5      5       5
+5      5       5
+5      5       5
+5      5       5
+5      5       5
+5      5       5
+
+-- !sql --
+
+-- !sql --
+3      3       3
+3      3       3
+3      3       3
+3      3       3
+3      3       3
+3      3       3
+
+-- !sql --
+
diff --git a/regression-test/suites/delete_p0/test_zone_map_delete.groovy 
b/regression-test/suites/delete_p0/test_zone_map_delete.groovy
new file mode 100644
index 0000000000..5e8beb375d
--- /dev/null
+++ b/regression-test/suites/delete_p0/test_zone_map_delete.groovy
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_zone_map_delete") {
+    def tableName = "test_zone_map_delete_tbl"
+
+    // comparison predicate
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """ CREATE TABLE IF NOT EXISTS ${tableName} (   `k1` int(11) NULL,   
`k2` int(11) NULL,   `v1` int(11) NULL )DUPLICATE KEY(`k1`,k2) DISTRIBUTED BY 
HASH(`k1`) BUCKETS 1 PROPERTIES("replication_num" = "1");"""
+    sql """insert into ${tableName} values(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), (2,2,2),(3,3,3),(4,4,4),(5,5,5);"""
+    sql """delete from ${tableName} where v1 > 3;"""
+    qt_sql """select * from ${tableName} ORDER BY k1;"""
+
+    // in predicate
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """ CREATE TABLE IF NOT EXISTS ${tableName} (   `k1` int(11) NULL,   
`k2` int(11) NULL,   `v1` int(11) NULL )DUPLICATE KEY(`k1`,k2) DISTRIBUTED BY 
HASH(`k1`) BUCKETS 1 PROPERTIES("replication_num" = "1");"""
+    sql """insert into ${tableName} values(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), (2,2,2),(3,3,3),(4,4,4),(5,5,5);"""
+    sql """delete from ${tableName} where v1 in (3);"""
+    qt_sql """select * from ${tableName} ORDER BY k1;"""
+
+    // null predicate
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """ CREATE TABLE IF NOT EXISTS ${tableName} (   `k1` int(11) NULL,   
`k2` int(11) NULL,   `v1` int(11) NULL )DUPLICATE KEY(`k1`,k2) DISTRIBUTED BY 
HASH(`k1`) BUCKETS 1 PROPERTIES("replication_num" = "1");"""
+    sql """insert into ${tableName} values(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), (2,2,2),(3,3,3),(4,4,4),(5,5,5);"""
+    sql """delete from ${tableName} where v1 IS NOT NULL;"""
+    qt_sql """select * from ${tableName} ORDER BY k1;"""
+
+    // not in predicate
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """ CREATE TABLE IF NOT EXISTS ${tableName} (   `k1` int(11) NULL,   
`k2` int(11) NULL,   `v1` int(11) NULL )DUPLICATE KEY(`k1`,k2) DISTRIBUTED BY 
HASH(`k1`) BUCKETS 1 PROPERTIES("replication_num" = "1");"""
+    sql """insert into ${tableName} values(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), (2,2,2),(3,3,3),(4,4,4),(5,5,5);"""
+    sql """delete from ${tableName} where v1 not in (3);"""
+    qt_sql """select * from ${tableName} ORDER BY k1;"""
+
+    // not in predicate
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """ CREATE TABLE IF NOT EXISTS ${tableName} (   `k1` int(11) NULL,   
`k2` int(11) NULL,   `v1` int(11) NULL )DUPLICATE KEY(`k1`,k2) DISTRIBUTED BY 
HASH(`k1`) BUCKETS 1 PROPERTIES("replication_num" = "1");"""
+    sql """insert into ${tableName} values(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), 
(2,2,2),(3,3,3),(4,4,4),(5,5,5),(1,1,1), (2,2,2),(3,3,3),(4,4,4),(5,5,5);"""
+    sql """delete from ${tableName} where v1 not in (0);"""
+    qt_sql """select * from ${tableName} ORDER BY k1;"""
+
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[doris] branch master updated: [Bug](delete) Fix wrong delete operation (#13840)

Reply via email to