This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new de4466624d [refactor](schema change)Remove delete from sc (#11441)
de4466624d is described below

commit de4466624da9ef7eb76b3b70f8953c6ea6e89401
Author: yiguolei <[email protected]>
AuthorDate: Wed Aug 3 03:29:41 2022 +0800

    [refactor](schema change)Remove delete from sc (#11441)
    
    * not need call delete handler to filter rows since they are filtered in 
rowset reader
    
    * need not call delete eval in schema change and remove related code
    
    Co-authored-by: yiguolei <[email protected]>
---
 be/src/olap/collect_iterator.cpp                   | 27 +------
 be/src/olap/collect_iterator.h                     |  1 -
 be/src/olap/delete_handler.cpp                     | 13 ----
 be/src/olap/delete_handler.h                       | 16 +---
 be/src/olap/olap_cond.cpp                          | 68 -----------------
 be/src/olap/olap_cond.h                            |  9 ---
 be/src/olap/schema_change.cpp                      | 85 ++++++----------------
 be/test/olap/delete_handler_test.cpp               |  9 ---
 .../test_alter_table_column_with_delete.out        | 12 +++
 .../test_alter_table_column_with_delete.groovy     | 63 ++++++++++++++++
 10 files changed, 100 insertions(+), 203 deletions(-)

diff --git a/be/src/olap/collect_iterator.cpp b/be/src/olap/collect_iterator.cpp
index e9b5127022..dd98303108 100644
--- a/be/src/olap/collect_iterator.cpp
+++ b/be/src/olap/collect_iterator.cpp
@@ -195,7 +195,7 @@ 
CollectIterator::Level0Iterator::Level0Iterator(RowsetReaderSharedPtr rs_reader,
     if (LIKELY(rs_reader->type() == RowsetTypePB::BETA_ROWSET)) {
         _refresh_current_row = &Level0Iterator::_refresh_current_row_v2;
     } else {
-        _refresh_current_row = &Level0Iterator::_refresh_current_row_v1;
+        LOG(FATAL) << "Not supported rowset type";
     }
 }
 
@@ -220,31 +220,6 @@ int64_t CollectIterator::Level0Iterator::version() const {
     return _rs_reader->version().second;
 }
 
-Status CollectIterator::Level0Iterator::_refresh_current_row_v1() {
-    do {
-        if (_row_block != nullptr && _row_block->has_remaining()) {
-            size_t pos = _row_block->pos();
-            _row_block->get_row(pos, &_row_cursor);
-            if (_row_block->block_status() == DEL_PARTIAL_SATISFIED &&
-                _reader->_delete_handler.is_filter_data(version(), 
_row_cursor)) {
-                _reader->_stats.rows_del_filtered++;
-                _row_block->pos_inc();
-                continue;
-            }
-            _current_row = &_row_cursor;
-            return Status::OK();
-        } else {
-            auto res = _rs_reader->next_block(&_row_block);
-            if (!res.ok()) {
-                _current_row = nullptr;
-                return res;
-            }
-        }
-    } while (_row_block != nullptr);
-    _current_row = nullptr;
-    return Status::OLAPInternalError(OLAP_ERR_DATA_EOF);
-}
-
 Status CollectIterator::Level0Iterator::_refresh_current_row_v2() {
     do {
         if (_row_block != nullptr && _row_block->has_remaining()) {
diff --git a/be/src/olap/collect_iterator.h b/be/src/olap/collect_iterator.h
index 1342987707..e765013666 100644
--- a/be/src/olap/collect_iterator.h
+++ b/be/src/olap/collect_iterator.h
@@ -139,7 +139,6 @@ private:
     private:
         Status (Level0Iterator::*_refresh_current_row)() = nullptr;
 
-        Status _refresh_current_row_v1();
         Status _refresh_current_row_v2();
 
         RowsetReaderSharedPtr _rs_reader;
diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp
index 716c84b504..f3a24731ab 100644
--- a/be/src/olap/delete_handler.cpp
+++ b/be/src/olap/delete_handler.cpp
@@ -310,19 +310,6 @@ Status DeleteHandler::init(const TabletSchema& schema,
     return Status::OK();
 }
 
-bool DeleteHandler::is_filter_data(const int64_t data_version, const 
RowCursor& row) const {
-    // According to semantics, the delete condition stored in _del_conds 
should be an OR relationship,
-    // so as long as the data matches one of the _del_conds, it will return 
true.
-    for (const auto& del_cond : _del_conds) {
-        if (data_version <= del_cond.filter_version &&
-            del_cond.del_cond->delete_conditions_eval(row)) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
 std::vector<int64_t> DeleteHandler::get_conds_version() {
     std::vector<int64_t> conds_version;
     for (const auto& cond : _del_conds) {
diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h
index c04dbadcb5..f3f68eff5d 100644
--- a/be/src/olap/delete_handler.h
+++ b/be/src/olap/delete_handler.h
@@ -46,11 +46,7 @@ struct DeleteConditions {
 //    Status res;
 //    DeleteHandler delete_handler;
 //    res = delete_handler.init(tablet, condition_version);
-// 2. Use it to check whether a row should be deleted:
-//    bool should_be_deleted = delete_handler.is_filter_data(data_version, 
row_cursor);
-// 3. If there are multiple rows, you can invoke function is_filter_data 
multiple times:
-//    should_be_deleted = delete_handler.is_filter_data(data_version, 
row_cursor);
-// 4. After all rows have been checked, you should release this object by 
calling:
+// 2. After all rows have been checked, you should release this object by 
calling:
 //    delete_handler.finalize();
 //
 // NOTE:
@@ -96,16 +92,6 @@ public:
     Status init(const TabletSchema& schema, const 
std::vector<DeletePredicatePB>& delete_conditions,
                 int64_t version, const doris::TabletReader* = nullptr);
 
-    // Check whether a row should be deleted.
-    //
-    // input:
-    //     * data_version: the version of this row
-    //     * row: the row data to be checked
-    // return:
-    //     * true: this row should be deleted
-    //     * false: this row should NOT be deleted
-    bool is_filter_data(const int64_t data_version, const RowCursor& row) 
const;
-
     // Return the delete conditions' size.
     size_t conditions_num() const { return _del_conds.size(); }
 
diff --git a/be/src/olap/olap_cond.cpp b/be/src/olap/olap_cond.cpp
index 3a478a665d..627f846092 100644
--- a/be/src/olap/olap_cond.cpp
+++ b/be/src/olap/olap_cond.cpp
@@ -174,46 +174,6 @@ Status Cond::init(const TCondition& tcond, const 
TabletColumn& column) {
     return Status::OK();
 }
 
-bool Cond::eval(const RowCursorCell& cell) const {
-    if (cell.is_null() && op != OP_IS) {
-        //Any operation other than OP_IS operand and NULL is false
-        return false;
-    }
-
-    switch (op) {
-    case OP_EQ:
-        return operand_field->field()->compare_cell(*operand_field, cell) == 0;
-    case OP_NE:
-        return operand_field->field()->compare_cell(*operand_field, cell) != 0;
-    case OP_LT:
-        return operand_field->field()->compare_cell(*operand_field, cell) > 0;
-    case OP_LE:
-        return operand_field->field()->compare_cell(*operand_field, cell) >= 0;
-    case OP_GT:
-        return operand_field->field()->compare_cell(*operand_field, cell) < 0;
-    case OP_GE:
-        return operand_field->field()->compare_cell(*operand_field, cell) <= 0;
-    case OP_IN: {
-        WrapperField 
wrapperField(const_cast<Field*>(min_value_field->field()), cell);
-        auto ret = operand_set.find(&wrapperField) != operand_set.end();
-        wrapperField.release_field();
-        return ret;
-    }
-    case OP_NOT_IN: {
-        WrapperField 
wrapperField(const_cast<Field*>(min_value_field->field()), cell);
-        auto ret = operand_set.find(&wrapperField) == operand_set.end();
-        wrapperField.release_field();
-        return ret;
-    }
-    case OP_IS: {
-        return operand_field->is_null() == cell.is_null();
-    }
-    default:
-        // Unknown operation type, just return false
-        return false;
-    }
-}
-
 bool Cond::eval(const std::pair<WrapperField*, WrapperField*>& statistic) 
const {
     //A single query condition filtered by a single column
     // When we apply column statistic, Field can be NULL when type is Varchar,
@@ -510,18 +470,6 @@ Status CondColumn::add_cond(const TCondition& tcond, const 
TabletColumn& column)
     return Status::OK();
 }
 
-bool CondColumn::eval(const RowCursor& row) const {
-    auto cell = row.cell(_col_index);
-    for (auto& each_cond : _conds) {
-        // As long as there is one condition not satisfied, we can return false
-        if (!each_cond->eval(cell)) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
 bool CondColumn::eval(const std::pair<WrapperField*, WrapperField*>& 
statistic) const {
     for (auto& each_cond : _conds) {
         // As long as there is one condition not satisfied, we can return false
@@ -613,22 +561,6 @@ Status Conditions::append_condition(const TCondition& 
tcond) {
     return cond_col->add_cond(tcond, column);
 }
 
-bool Conditions::delete_conditions_eval(const RowCursor& row) const {
-    if (_columns.empty()) {
-        return false;
-    }
-
-    for (auto& each_cond : _columns) {
-        if (_cond_column_is_key_or_duplicate(each_cond.second) && 
!each_cond.second->eval(row)) {
-            return false;
-        }
-    }
-
-    VLOG_NOTICE << "Row meets the delete conditions. "
-                << "condition_count=" << _columns.size() << ", row=" << 
row.to_string();
-    return true;
-}
-
 CondColumn* Conditions::get_column(int32_t cid) const {
     auto iter = _columns.find(cid);
     if (iter != _columns.end()) {
diff --git a/be/src/olap/olap_cond.h b/be/src/olap/olap_cond.h
index e8bcb7a977..cdf10cbbc4 100644
--- a/be/src/olap/olap_cond.h
+++ b/be/src/olap/olap_cond.h
@@ -71,7 +71,6 @@ public:
 
     // 用一行数据的指定列同条件进行比较,如果符合过滤条件,
     // 即按照此条件,行应被过滤掉,则返回true,否则返回false
-    bool eval(const RowCursorCell& cell) const;
     bool eval(const KeyRange& statistic) const;
 
     // 通过单列上的单个删除条件对version进行过滤
@@ -104,10 +103,6 @@ public:
 
     Status add_cond(const TCondition& tcond, const TabletColumn& column);
 
-    // 对一行数据中的指定列,用所有过滤条件进行比较,如果所有条件都满足,则过滤此行
-    // Return true means this row should be filtered out, otherwise return 
false
-    bool eval(const RowCursor& row) const;
-
     // Return true if the rowset should be pruned
     bool eval(const std::pair<WrapperField*, WrapperField*>& statistic) const;
 
@@ -171,10 +166,6 @@ public:
     // 2. column类型是double, float
     Status append_condition(const TCondition& condition);
 
-    // 通过所有列上的删除条件对RowCursor进行过滤
-    // Return true means this row should be filtered out, otherwise return 
false
-    bool delete_conditions_eval(const RowCursor& row) const;
-
     const CondColumns& columns() const { return _columns; }
 
     CondColumn* get_column(int32_t cid) const;
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index abc83ff768..743393e555 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -268,33 +268,29 @@ ColumnMapping* 
RowBlockChanger::get_mutable_column_mapping(size_t column_index)
     return &(_schema_mapping[column_index]);
 }
 
-#define TYPE_REINTERPRET_CAST(FromType, ToType)                             \
-    {                                                                       \
-        size_t row_num = ref_block->row_block_info().row_num;               \
-        for (size_t row = 0, mutable_row = 0; row < row_num; ++row) {       \
-            if (is_data_left_vec[row] != 0) {                               \
-                char* ref_ptr = ref_block->field_ptr(row, ref_column);      \
-                char* new_ptr = mutable_block->field_ptr(mutable_row++, i); \
-                *new_ptr = *ref_ptr;                                        \
-                *(ToType*)(new_ptr + 1) = *(FromType*)(ref_ptr + 1);        \
-            }                                                               \
-        }                                                                   \
-        break;                                                              \
-    }
-
-#define LARGEINT_REINTERPRET_CAST(FromType, ToType)                         \
-    {                                                                       \
-        size_t row_num = ref_block->row_block_info().row_num;               \
-        for (size_t row = 0, mutable_row = 0; row < row_num; ++row) {       \
-            if (is_data_left_vec[row] != 0) {                               \
-                char* ref_ptr = ref_block->field_ptr(row, ref_column);      \
-                char* new_ptr = mutable_block->field_ptr(mutable_row++, i); \
-                *new_ptr = *ref_ptr;                                        \
-                ToType new_value = *(FromType*)(ref_ptr + 1);               \
-                memcpy(new_ptr + 1, &new_value, sizeof(ToType));            \
-            }                                                               \
-        }                                                                   \
-        break;                                                              \
+#define TYPE_REINTERPRET_CAST(FromType, ToType)                         \
+    {                                                                   \
+        size_t row_num = ref_block->row_block_info().row_num;           \
+        for (size_t row = 0, mutable_row = 0; row < row_num; ++row) {   \
+            char* ref_ptr = ref_block->field_ptr(row, ref_column);      \
+            char* new_ptr = mutable_block->field_ptr(mutable_row++, i); \
+            *new_ptr = *ref_ptr;                                        \
+            *(ToType*)(new_ptr + 1) = *(FromType*)(ref_ptr + 1);        \
+        }                                                               \
+        break;                                                          \
+    }
+
+#define LARGEINT_REINTERPRET_CAST(FromType, ToType)                     \
+    {                                                                   \
+        size_t row_num = ref_block->row_block_info().row_num;           \
+        for (size_t row = 0, mutable_row = 0; row < row_num; ++row) {   \
+            char* ref_ptr = ref_block->field_ptr(row, ref_column);      \
+            char* new_ptr = mutable_block->field_ptr(mutable_row++, i); \
+            *new_ptr = *ref_ptr;                                        \
+            ToType new_value = *(FromType*)(ref_ptr + 1);               \
+            memcpy(new_ptr + 1, &new_value, sizeof(ToType));            \
+        }                                                               \
+        break;                                                          \
     }
 
 #define CONVERT_FROM_TYPE(from_type)                                           
                 \
@@ -615,27 +611,10 @@ Status RowBlockChanger::change_row_block(const RowBlock* 
ref_block, int32_t data
     // a.1 First determine whether the data needs to be filtered, and finally 
only those marked as 1 are left as needed
     // For those without filter, it is equivalent to leave after setting all 
to 1
     const uint32_t row_num = ref_block->row_block_info().row_num;
-    // (0 means no need to filter out, 1 means yes, during the process 2 means 
that this row needs to be cut and there is no need to compare other columns 
later)
-    std::vector<int8_t> is_data_left_vec(row_num, 1);
-
-    // Compare each row
-    for (size_t row_index = 0; row_index < row_num; ++row_index) {
-        ref_block->get_row(row_index, &read_helper);
-
-        // filter data according to delete conditions specified in DeleteData 
command
-        if (is_data_left_vec[row_index] == 1) {
-            if (_delete_handler != nullptr &&
-                _delete_handler->is_filter_data(data_version, read_helper)) {
-                is_data_left_vec[row_index] = 0;
-                (*filtered_rows)++;
-            }
-        }
-    }
 
     // a.2 Calculate the left row num
     uint32_t new_row_num = row_num - *filtered_rows;
 
-    const bool need_filter_data = (new_row_num != row_num);
     const bool filter_all = (new_row_num == 0);
 
     MemPool* mem_pool = mutable_block->mem_pool();
@@ -662,10 +641,6 @@ Status RowBlockChanger::change_row_block(const RowBlock* 
ref_block, int32_t data
                             << _schema_mapping[i].materialized_function;
                 for (size_t row_index = 0, new_row_index = 0;
                      row_index < ref_block->row_block_info().row_num; 
++row_index) {
-                    // No need row, need to be filter
-                    if (need_filter_data && is_data_left_vec[row_index] == 0) {
-                        continue;
-                    }
                     mutable_block->get_row(new_row_index++, &write_helper);
                     ref_block->get_row(row_index, &read_helper);
 
@@ -686,11 +661,6 @@ Status RowBlockChanger::change_row_block(const RowBlock* 
ref_block, int32_t data
                 // Low efficiency, you can also directly calculate the 
variable length domain copy, but it will still destroy the package
                 for (size_t row_index = 0, new_row_index = 0;
                      row_index < ref_block->row_block_info().row_num; 
++row_index) {
-                    // Unneeded row, skip every time this row is processed
-                    if (need_filter_data && is_data_left_vec[row_index] == 0) {
-                        continue;
-                    }
-
                     // Specify the new row index to be written (different from 
the read row_index)
                     mutable_block->get_row(new_row_index++, &write_helper);
                     ref_block->get_row(row_index, &read_helper);
@@ -720,10 +690,6 @@ Status RowBlockChanger::change_row_block(const RowBlock* 
ref_block, int32_t data
             } else if 
(ConvertTypeResolver::instance()->get_convert_type_info(reftype, newtype)) {
                 for (size_t row_index = 0, new_row_index = 0;
                      row_index < ref_block->row_block_info().row_num; 
++row_index) {
-                    // Skip filtered rows
-                    if (need_filter_data && is_data_left_vec[row_index] == 0) {
-                        continue;
-                    }
                     mutable_block->get_row(new_row_index++, &write_helper);
                     ref_block->get_row(row_index, &read_helper);
                     if (read_helper.is_null(ref_column)) {
@@ -786,11 +752,6 @@ Status RowBlockChanger::change_row_block(const RowBlock* 
ref_block, int32_t data
             // New column, write default value
             for (size_t row_index = 0, new_row_index = 0;
                  row_index < ref_block->row_block_info().row_num; ++row_index) 
{
-                // Unneeded row, skip every time this row is processed
-                if (need_filter_data && is_data_left_vec[row_index] == 0) {
-                    continue;
-                }
-
                 mutable_block->get_row(new_row_index++, &write_helper);
 
                 if (_schema_mapping[i].default_value->is_null()) {
diff --git a/be/test/olap/delete_handler_test.cpp 
b/be/test/olap/delete_handler_test.cpp
index 11571331ea..bc0ca3bce5 100644
--- a/be/test/olap/delete_handler_test.cpp
+++ b/be/test/olap/delete_handler_test.cpp
@@ -961,15 +961,12 @@ TEST_F(TestDeleteHandler, FilterDataSubconditions) {
     OlapTuple tuple1(data_str);
     res = _data_row_cursor.from_tuple(tuple1);
     EXPECT_EQ(Status::OK(), res);
-    EXPECT_TRUE(_delete_handler.is_filter_data(1, _data_row_cursor));
 
     // 构造一行测试数据
     data_str[1] = "4";
     OlapTuple tuple2(data_str);
     res = _data_row_cursor.from_tuple(tuple2);
     EXPECT_EQ(Status::OK(), res);
-    // 不满足子条件:k2!=4
-    EXPECT_FALSE(_delete_handler.is_filter_data(1, _data_row_cursor));
 
     _delete_handler.finalize();
 }
@@ -1048,8 +1045,6 @@ TEST_F(TestDeleteHandler, FilterDataConditions) {
     OlapTuple tuple(data_str);
     res = _data_row_cursor.from_tuple(tuple);
     EXPECT_EQ(Status::OK(), res);
-    // 这行数据会因为过滤条件3而被过滤
-    EXPECT_TRUE(_delete_handler.is_filter_data(3, _data_row_cursor));
 
     _delete_handler.finalize();
 }
@@ -1114,10 +1109,6 @@ TEST_F(TestDeleteHandler, FilterDataVersion) {
     OlapTuple tuple(data_str);
     res = _data_row_cursor.from_tuple(tuple);
     EXPECT_EQ(Status::OK(), res);
-    // 如果数据版本小于3,则过滤条件1生效,这条数据被过滤
-    EXPECT_TRUE(_delete_handler.is_filter_data(2, _data_row_cursor));
-    // 如果数据版本大于3,则过滤条件1会被跳过
-    EXPECT_FALSE(_delete_handler.is_filter_data(4, _data_row_cursor));
 
     _delete_handler.finalize();
 }
diff --git 
a/regression-test/data/schema_change/test_alter_table_column_with_delete.out 
b/regression-test/data/schema_change/test_alter_table_column_with_delete.out
new file mode 100644
index 0000000000..2db3bfd38e
--- /dev/null
+++ b/regression-test/data/schema_change/test_alter_table_column_with_delete.out
@@ -0,0 +1,12 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      1
+3      3
+4      4
+
+-- !sql --
+1      1
+3      3
+4      4
+5      abc
+
diff --git 
a/regression-test/suites/schema_change/test_alter_table_column_with_delete.groovy
 
b/regression-test/suites/schema_change/test_alter_table_column_with_delete.groovy
new file mode 100644
index 0000000000..b26d82bfdf
--- /dev/null
+++ 
b/regression-test/suites/schema_change/test_alter_table_column_with_delete.groovy
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_alter_table_column_with_delete", "schema_change") {
+    def tbName1 = "alter_table_column_dup_with_delete"
+    def getJobState = { tableName ->
+        def jobStateResult = sql """  SHOW ALTER TABLE COLUMN WHERE 
TableName='${tableName}' ORDER BY createtime DESC LIMIT 1 """
+        return jobStateResult[0][9]
+    }
+    sql "DROP TABLE IF EXISTS ${tbName1}"
+    sql """
+            CREATE TABLE IF NOT EXISTS ${tbName1} (
+                k1 INT,
+                value1 INT
+            )
+            UNIQUE KEY (k1)
+            DISTRIBUTED BY HASH(k1) BUCKETS 1 properties("replication_num" = 
"1");
+        """
+
+    sql "insert into ${tbName1} values(1,1);"
+    sql "insert into ${tbName1} values(2,2);"
+    sql "delete from ${tbName1} where k1 = 2;"
+    sql "insert into ${tbName1} values(3,3);"
+    sql "insert into ${tbName1} values(4,4);"
+    qt_sql "select * from ${tbName1};"
+
+
+    sql """
+            ALTER TABLE ${tbName1} 
+            MODIFY COLUMN value1 varchar(22);
+        """
+    int max_try_secs = 120
+    while (max_try_secs--) {
+        String res = getJobState(tbName1)
+        if (res == "FINISHED") {
+            break
+        } else {
+            Thread.sleep(500)
+            if (max_try_secs < 1) {
+                println "test timeout," + "state:" + res
+                assertEquals("FINISHED",res)
+            }
+        }
+    }
+
+    sql "insert into ${tbName1} values(5,'abc');"
+    qt_sql "select * from ${tbName1};"
+    sql "DROP TABLE ${tbName1} FORCE;"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to