This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new de4466624d [refactor](schema change)Remove delete from sc (#11441)
de4466624d is described below
commit de4466624da9ef7eb76b3b70f8953c6ea6e89401
Author: yiguolei <[email protected]>
AuthorDate: Wed Aug 3 03:29:41 2022 +0800
[refactor](schema change)Remove delete from sc (#11441)
* not need call delete handler to filter rows since they are filtered in
rowset reader
* need not call delete eval in schema change and remove related code
Co-authored-by: yiguolei <[email protected]>
---
be/src/olap/collect_iterator.cpp | 27 +------
be/src/olap/collect_iterator.h | 1 -
be/src/olap/delete_handler.cpp | 13 ----
be/src/olap/delete_handler.h | 16 +---
be/src/olap/olap_cond.cpp | 68 -----------------
be/src/olap/olap_cond.h | 9 ---
be/src/olap/schema_change.cpp | 85 ++++++----------------
be/test/olap/delete_handler_test.cpp | 9 ---
.../test_alter_table_column_with_delete.out | 12 +++
.../test_alter_table_column_with_delete.groovy | 63 ++++++++++++++++
10 files changed, 100 insertions(+), 203 deletions(-)
diff --git a/be/src/olap/collect_iterator.cpp b/be/src/olap/collect_iterator.cpp
index e9b5127022..dd98303108 100644
--- a/be/src/olap/collect_iterator.cpp
+++ b/be/src/olap/collect_iterator.cpp
@@ -195,7 +195,7 @@
CollectIterator::Level0Iterator::Level0Iterator(RowsetReaderSharedPtr rs_reader,
if (LIKELY(rs_reader->type() == RowsetTypePB::BETA_ROWSET)) {
_refresh_current_row = &Level0Iterator::_refresh_current_row_v2;
} else {
- _refresh_current_row = &Level0Iterator::_refresh_current_row_v1;
+ LOG(FATAL) << "Not supported rowset type";
}
}
@@ -220,31 +220,6 @@ int64_t CollectIterator::Level0Iterator::version() const {
return _rs_reader->version().second;
}
-Status CollectIterator::Level0Iterator::_refresh_current_row_v1() {
- do {
- if (_row_block != nullptr && _row_block->has_remaining()) {
- size_t pos = _row_block->pos();
- _row_block->get_row(pos, &_row_cursor);
- if (_row_block->block_status() == DEL_PARTIAL_SATISFIED &&
- _reader->_delete_handler.is_filter_data(version(),
_row_cursor)) {
- _reader->_stats.rows_del_filtered++;
- _row_block->pos_inc();
- continue;
- }
- _current_row = &_row_cursor;
- return Status::OK();
- } else {
- auto res = _rs_reader->next_block(&_row_block);
- if (!res.ok()) {
- _current_row = nullptr;
- return res;
- }
- }
- } while (_row_block != nullptr);
- _current_row = nullptr;
- return Status::OLAPInternalError(OLAP_ERR_DATA_EOF);
-}
-
Status CollectIterator::Level0Iterator::_refresh_current_row_v2() {
do {
if (_row_block != nullptr && _row_block->has_remaining()) {
diff --git a/be/src/olap/collect_iterator.h b/be/src/olap/collect_iterator.h
index 1342987707..e765013666 100644
--- a/be/src/olap/collect_iterator.h
+++ b/be/src/olap/collect_iterator.h
@@ -139,7 +139,6 @@ private:
private:
Status (Level0Iterator::*_refresh_current_row)() = nullptr;
- Status _refresh_current_row_v1();
Status _refresh_current_row_v2();
RowsetReaderSharedPtr _rs_reader;
diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp
index 716c84b504..f3a24731ab 100644
--- a/be/src/olap/delete_handler.cpp
+++ b/be/src/olap/delete_handler.cpp
@@ -310,19 +310,6 @@ Status DeleteHandler::init(const TabletSchema& schema,
return Status::OK();
}
-bool DeleteHandler::is_filter_data(const int64_t data_version, const
RowCursor& row) const {
- // According to semantics, the delete condition stored in _del_conds
should be an OR relationship,
- // so as long as the data matches one of the _del_conds, it will return
true.
- for (const auto& del_cond : _del_conds) {
- if (data_version <= del_cond.filter_version &&
- del_cond.del_cond->delete_conditions_eval(row)) {
- return true;
- }
- }
-
- return false;
-}
-
std::vector<int64_t> DeleteHandler::get_conds_version() {
std::vector<int64_t> conds_version;
for (const auto& cond : _del_conds) {
diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h
index c04dbadcb5..f3f68eff5d 100644
--- a/be/src/olap/delete_handler.h
+++ b/be/src/olap/delete_handler.h
@@ -46,11 +46,7 @@ struct DeleteConditions {
// Status res;
// DeleteHandler delete_handler;
// res = delete_handler.init(tablet, condition_version);
-// 2. Use it to check whether a row should be deleted:
-// bool should_be_deleted = delete_handler.is_filter_data(data_version,
row_cursor);
-// 3. If there are multiple rows, you can invoke function is_filter_data
multiple times:
-// should_be_deleted = delete_handler.is_filter_data(data_version,
row_cursor);
-// 4. After all rows have been checked, you should release this object by
calling:
+// 2. After all rows have been checked, you should release this object by
calling:
// delete_handler.finalize();
//
// NOTE:
@@ -96,16 +92,6 @@ public:
Status init(const TabletSchema& schema, const
std::vector<DeletePredicatePB>& delete_conditions,
int64_t version, const doris::TabletReader* = nullptr);
- // Check whether a row should be deleted.
- //
- // input:
- // * data_version: the version of this row
- // * row: the row data to be checked
- // return:
- // * true: this row should be deleted
- // * false: this row should NOT be deleted
- bool is_filter_data(const int64_t data_version, const RowCursor& row)
const;
-
// Return the delete conditions' size.
size_t conditions_num() const { return _del_conds.size(); }
diff --git a/be/src/olap/olap_cond.cpp b/be/src/olap/olap_cond.cpp
index 3a478a665d..627f846092 100644
--- a/be/src/olap/olap_cond.cpp
+++ b/be/src/olap/olap_cond.cpp
@@ -174,46 +174,6 @@ Status Cond::init(const TCondition& tcond, const
TabletColumn& column) {
return Status::OK();
}
-bool Cond::eval(const RowCursorCell& cell) const {
- if (cell.is_null() && op != OP_IS) {
- //Any operation other than OP_IS operand and NULL is false
- return false;
- }
-
- switch (op) {
- case OP_EQ:
- return operand_field->field()->compare_cell(*operand_field, cell) == 0;
- case OP_NE:
- return operand_field->field()->compare_cell(*operand_field, cell) != 0;
- case OP_LT:
- return operand_field->field()->compare_cell(*operand_field, cell) > 0;
- case OP_LE:
- return operand_field->field()->compare_cell(*operand_field, cell) >= 0;
- case OP_GT:
- return operand_field->field()->compare_cell(*operand_field, cell) < 0;
- case OP_GE:
- return operand_field->field()->compare_cell(*operand_field, cell) <= 0;
- case OP_IN: {
- WrapperField
wrapperField(const_cast<Field*>(min_value_field->field()), cell);
- auto ret = operand_set.find(&wrapperField) != operand_set.end();
- wrapperField.release_field();
- return ret;
- }
- case OP_NOT_IN: {
- WrapperField
wrapperField(const_cast<Field*>(min_value_field->field()), cell);
- auto ret = operand_set.find(&wrapperField) == operand_set.end();
- wrapperField.release_field();
- return ret;
- }
- case OP_IS: {
- return operand_field->is_null() == cell.is_null();
- }
- default:
- // Unknown operation type, just return false
- return false;
- }
-}
-
bool Cond::eval(const std::pair<WrapperField*, WrapperField*>& statistic)
const {
//A single query condition filtered by a single column
// When we apply column statistic, Field can be NULL when type is Varchar,
@@ -510,18 +470,6 @@ Status CondColumn::add_cond(const TCondition& tcond, const
TabletColumn& column)
return Status::OK();
}
-bool CondColumn::eval(const RowCursor& row) const {
- auto cell = row.cell(_col_index);
- for (auto& each_cond : _conds) {
- // As long as there is one condition not satisfied, we can return false
- if (!each_cond->eval(cell)) {
- return false;
- }
- }
-
- return true;
-}
-
bool CondColumn::eval(const std::pair<WrapperField*, WrapperField*>&
statistic) const {
for (auto& each_cond : _conds) {
// As long as there is one condition not satisfied, we can return false
@@ -613,22 +561,6 @@ Status Conditions::append_condition(const TCondition&
tcond) {
return cond_col->add_cond(tcond, column);
}
-bool Conditions::delete_conditions_eval(const RowCursor& row) const {
- if (_columns.empty()) {
- return false;
- }
-
- for (auto& each_cond : _columns) {
- if (_cond_column_is_key_or_duplicate(each_cond.second) &&
!each_cond.second->eval(row)) {
- return false;
- }
- }
-
- VLOG_NOTICE << "Row meets the delete conditions. "
- << "condition_count=" << _columns.size() << ", row=" <<
row.to_string();
- return true;
-}
-
CondColumn* Conditions::get_column(int32_t cid) const {
auto iter = _columns.find(cid);
if (iter != _columns.end()) {
diff --git a/be/src/olap/olap_cond.h b/be/src/olap/olap_cond.h
index e8bcb7a977..cdf10cbbc4 100644
--- a/be/src/olap/olap_cond.h
+++ b/be/src/olap/olap_cond.h
@@ -71,7 +71,6 @@ public:
// 用一行数据的指定列同条件进行比较,如果符合过滤条件,
// 即按照此条件,行应被过滤掉,则返回true,否则返回false
- bool eval(const RowCursorCell& cell) const;
bool eval(const KeyRange& statistic) const;
// 通过单列上的单个删除条件对version进行过滤
@@ -104,10 +103,6 @@ public:
Status add_cond(const TCondition& tcond, const TabletColumn& column);
- // 对一行数据中的指定列,用所有过滤条件进行比较,如果所有条件都满足,则过滤此行
- // Return true means this row should be filtered out, otherwise return
false
- bool eval(const RowCursor& row) const;
-
// Return true if the rowset should be pruned
bool eval(const std::pair<WrapperField*, WrapperField*>& statistic) const;
@@ -171,10 +166,6 @@ public:
// 2. column类型是double, float
Status append_condition(const TCondition& condition);
- // 通过所有列上的删除条件对RowCursor进行过滤
- // Return true means this row should be filtered out, otherwise return
false
- bool delete_conditions_eval(const RowCursor& row) const;
-
const CondColumns& columns() const { return _columns; }
CondColumn* get_column(int32_t cid) const;
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index abc83ff768..743393e555 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -268,33 +268,29 @@ ColumnMapping*
RowBlockChanger::get_mutable_column_mapping(size_t column_index)
return &(_schema_mapping[column_index]);
}
-#define TYPE_REINTERPRET_CAST(FromType, ToType) \
- { \
- size_t row_num = ref_block->row_block_info().row_num; \
- for (size_t row = 0, mutable_row = 0; row < row_num; ++row) { \
- if (is_data_left_vec[row] != 0) { \
- char* ref_ptr = ref_block->field_ptr(row, ref_column); \
- char* new_ptr = mutable_block->field_ptr(mutable_row++, i); \
- *new_ptr = *ref_ptr; \
- *(ToType*)(new_ptr + 1) = *(FromType*)(ref_ptr + 1); \
- } \
- } \
- break; \
- }
-
-#define LARGEINT_REINTERPRET_CAST(FromType, ToType) \
- { \
- size_t row_num = ref_block->row_block_info().row_num; \
- for (size_t row = 0, mutable_row = 0; row < row_num; ++row) { \
- if (is_data_left_vec[row] != 0) { \
- char* ref_ptr = ref_block->field_ptr(row, ref_column); \
- char* new_ptr = mutable_block->field_ptr(mutable_row++, i); \
- *new_ptr = *ref_ptr; \
- ToType new_value = *(FromType*)(ref_ptr + 1); \
- memcpy(new_ptr + 1, &new_value, sizeof(ToType)); \
- } \
- } \
- break; \
+#define TYPE_REINTERPRET_CAST(FromType, ToType) \
+ { \
+ size_t row_num = ref_block->row_block_info().row_num; \
+ for (size_t row = 0, mutable_row = 0; row < row_num; ++row) { \
+ char* ref_ptr = ref_block->field_ptr(row, ref_column); \
+ char* new_ptr = mutable_block->field_ptr(mutable_row++, i); \
+ *new_ptr = *ref_ptr; \
+ *(ToType*)(new_ptr + 1) = *(FromType*)(ref_ptr + 1); \
+ } \
+ break; \
+ }
+
+#define LARGEINT_REINTERPRET_CAST(FromType, ToType) \
+ { \
+ size_t row_num = ref_block->row_block_info().row_num; \
+ for (size_t row = 0, mutable_row = 0; row < row_num; ++row) { \
+ char* ref_ptr = ref_block->field_ptr(row, ref_column); \
+ char* new_ptr = mutable_block->field_ptr(mutable_row++, i); \
+ *new_ptr = *ref_ptr; \
+ ToType new_value = *(FromType*)(ref_ptr + 1); \
+ memcpy(new_ptr + 1, &new_value, sizeof(ToType)); \
+ } \
+ break; \
}
#define CONVERT_FROM_TYPE(from_type)
\
@@ -615,27 +611,10 @@ Status RowBlockChanger::change_row_block(const RowBlock*
ref_block, int32_t data
// a.1 First determine whether the data needs to be filtered, and finally
only those marked as 1 are left as needed
// For those without filter, it is equivalent to leave after setting all
to 1
const uint32_t row_num = ref_block->row_block_info().row_num;
- // (0 means no need to filter out, 1 means yes, during the process 2 means
that this row needs to be cut and there is no need to compare other columns
later)
- std::vector<int8_t> is_data_left_vec(row_num, 1);
-
- // Compare each row
- for (size_t row_index = 0; row_index < row_num; ++row_index) {
- ref_block->get_row(row_index, &read_helper);
-
- // filter data according to delete conditions specified in DeleteData
command
- if (is_data_left_vec[row_index] == 1) {
- if (_delete_handler != nullptr &&
- _delete_handler->is_filter_data(data_version, read_helper)) {
- is_data_left_vec[row_index] = 0;
- (*filtered_rows)++;
- }
- }
- }
// a.2 Calculate the left row num
uint32_t new_row_num = row_num - *filtered_rows;
- const bool need_filter_data = (new_row_num != row_num);
const bool filter_all = (new_row_num == 0);
MemPool* mem_pool = mutable_block->mem_pool();
@@ -662,10 +641,6 @@ Status RowBlockChanger::change_row_block(const RowBlock*
ref_block, int32_t data
<< _schema_mapping[i].materialized_function;
for (size_t row_index = 0, new_row_index = 0;
row_index < ref_block->row_block_info().row_num;
++row_index) {
- // No need row, need to be filter
- if (need_filter_data && is_data_left_vec[row_index] == 0) {
- continue;
- }
mutable_block->get_row(new_row_index++, &write_helper);
ref_block->get_row(row_index, &read_helper);
@@ -686,11 +661,6 @@ Status RowBlockChanger::change_row_block(const RowBlock*
ref_block, int32_t data
// Low efficiency, you can also directly calculate the
variable length domain copy, but it will still destroy the package
for (size_t row_index = 0, new_row_index = 0;
row_index < ref_block->row_block_info().row_num;
++row_index) {
- // Unneeded row, skip every time this row is processed
- if (need_filter_data && is_data_left_vec[row_index] == 0) {
- continue;
- }
-
// Specify the new row index to be written (different from
the read row_index)
mutable_block->get_row(new_row_index++, &write_helper);
ref_block->get_row(row_index, &read_helper);
@@ -720,10 +690,6 @@ Status RowBlockChanger::change_row_block(const RowBlock*
ref_block, int32_t data
} else if
(ConvertTypeResolver::instance()->get_convert_type_info(reftype, newtype)) {
for (size_t row_index = 0, new_row_index = 0;
row_index < ref_block->row_block_info().row_num;
++row_index) {
- // Skip filtered rows
- if (need_filter_data && is_data_left_vec[row_index] == 0) {
- continue;
- }
mutable_block->get_row(new_row_index++, &write_helper);
ref_block->get_row(row_index, &read_helper);
if (read_helper.is_null(ref_column)) {
@@ -786,11 +752,6 @@ Status RowBlockChanger::change_row_block(const RowBlock*
ref_block, int32_t data
// New column, write default value
for (size_t row_index = 0, new_row_index = 0;
row_index < ref_block->row_block_info().row_num; ++row_index)
{
- // Unneeded row, skip every time this row is processed
- if (need_filter_data && is_data_left_vec[row_index] == 0) {
- continue;
- }
-
mutable_block->get_row(new_row_index++, &write_helper);
if (_schema_mapping[i].default_value->is_null()) {
diff --git a/be/test/olap/delete_handler_test.cpp
b/be/test/olap/delete_handler_test.cpp
index 11571331ea..bc0ca3bce5 100644
--- a/be/test/olap/delete_handler_test.cpp
+++ b/be/test/olap/delete_handler_test.cpp
@@ -961,15 +961,12 @@ TEST_F(TestDeleteHandler, FilterDataSubconditions) {
OlapTuple tuple1(data_str);
res = _data_row_cursor.from_tuple(tuple1);
EXPECT_EQ(Status::OK(), res);
- EXPECT_TRUE(_delete_handler.is_filter_data(1, _data_row_cursor));
// 构造一行测试数据
data_str[1] = "4";
OlapTuple tuple2(data_str);
res = _data_row_cursor.from_tuple(tuple2);
EXPECT_EQ(Status::OK(), res);
- // 不满足子条件:k2!=4
- EXPECT_FALSE(_delete_handler.is_filter_data(1, _data_row_cursor));
_delete_handler.finalize();
}
@@ -1048,8 +1045,6 @@ TEST_F(TestDeleteHandler, FilterDataConditions) {
OlapTuple tuple(data_str);
res = _data_row_cursor.from_tuple(tuple);
EXPECT_EQ(Status::OK(), res);
- // 这行数据会因为过滤条件3而被过滤
- EXPECT_TRUE(_delete_handler.is_filter_data(3, _data_row_cursor));
_delete_handler.finalize();
}
@@ -1114,10 +1109,6 @@ TEST_F(TestDeleteHandler, FilterDataVersion) {
OlapTuple tuple(data_str);
res = _data_row_cursor.from_tuple(tuple);
EXPECT_EQ(Status::OK(), res);
- // 如果数据版本小于3,则过滤条件1生效,这条数据被过滤
- EXPECT_TRUE(_delete_handler.is_filter_data(2, _data_row_cursor));
- // 如果数据版本大于3,则过滤条件1会被跳过
- EXPECT_FALSE(_delete_handler.is_filter_data(4, _data_row_cursor));
_delete_handler.finalize();
}
diff --git
a/regression-test/data/schema_change/test_alter_table_column_with_delete.out
b/regression-test/data/schema_change/test_alter_table_column_with_delete.out
new file mode 100644
index 0000000000..2db3bfd38e
--- /dev/null
+++ b/regression-test/data/schema_change/test_alter_table_column_with_delete.out
@@ -0,0 +1,12 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+1 1
+3 3
+4 4
+
+-- !sql --
+1 1
+3 3
+4 4
+5 abc
+
diff --git
a/regression-test/suites/schema_change/test_alter_table_column_with_delete.groovy
b/regression-test/suites/schema_change/test_alter_table_column_with_delete.groovy
new file mode 100644
index 0000000000..b26d82bfdf
--- /dev/null
+++
b/regression-test/suites/schema_change/test_alter_table_column_with_delete.groovy
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_alter_table_column_with_delete", "schema_change") {
+ def tbName1 = "alter_table_column_dup_with_delete"
+ def getJobState = { tableName ->
+ def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE
TableName='${tableName}' ORDER BY createtime DESC LIMIT 1 """
+ return jobStateResult[0][9]
+ }
+ sql "DROP TABLE IF EXISTS ${tbName1}"
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tbName1} (
+ k1 INT,
+ value1 INT
+ )
+ UNIQUE KEY (k1)
+ DISTRIBUTED BY HASH(k1) BUCKETS 1 properties("replication_num" =
"1");
+ """
+
+ sql "insert into ${tbName1} values(1,1);"
+ sql "insert into ${tbName1} values(2,2);"
+ sql "delete from ${tbName1} where k1 = 2;"
+ sql "insert into ${tbName1} values(3,3);"
+ sql "insert into ${tbName1} values(4,4);"
+ qt_sql "select * from ${tbName1};"
+
+
+ sql """
+ ALTER TABLE ${tbName1}
+ MODIFY COLUMN value1 varchar(22);
+ """
+ int max_try_secs = 120
+ while (max_try_secs--) {
+ String res = getJobState(tbName1)
+ if (res == "FINISHED") {
+ break
+ } else {
+ Thread.sleep(500)
+ if (max_try_secs < 1) {
+ println "test timeout," + "state:" + res
+ assertEquals("FINISHED",res)
+ }
+ }
+ }
+
+ sql "insert into ${tbName1} values(5,'abc');"
+ qt_sql "select * from ${tbName1};"
+ sql "DROP TABLE ${tbName1} FORCE;"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]