This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 0f75bd0e38 [fix](delete) fix query result error after delete (#11754)
0f75bd0e38 is described below
commit 0f75bd0e385ad582747cd027b683d145ecaf9a38
Author: luozenglin <[email protected]>
AuthorDate: Mon Aug 15 17:52:03 2022 +0800
[fix](delete) fix query result error after delete (#11754)
convert dictionary code for delete predicates.
---
be/src/olap/block_column_predicate.h | 13 ++++++++
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 39 ++++++++++++++++++++++
be/src/olap/rowset/segment_v2/segment_iterator.h | 24 +++----------
.../delete_p0/test_segment_iterator_delete.out | 4 +++
.../delete_p0/test_segment_iterator_delete.groovy | 18 ++++++++++
5 files changed, 78 insertions(+), 20 deletions(-)
diff --git a/be/src/olap/block_column_predicate.h
b/be/src/olap/block_column_predicate.h
index 219a92c3aa..c2830c223b 100644
--- a/be/src/olap/block_column_predicate.h
+++ b/be/src/olap/block_column_predicate.h
@@ -43,6 +43,9 @@ public:
virtual void get_all_column_ids(std::set<ColumnId>& column_id_set) const =
0;
+ virtual void get_all_column_predicate(
+ std::set<const ColumnPredicate*>& predicate_set) const = 0;
+
virtual uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel,
uint16_t selected_size) const {
return selected_size;
@@ -68,6 +71,10 @@ public:
column_id_set.insert(_predicate->column_id());
};
+ void get_all_column_predicate(std::set<const ColumnPredicate*>&
predicate_set) const override {
+ predicate_set.insert(_predicate);
+ }
+
uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel,
uint16_t selected_size) const override;
void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel,
uint16_t selected_size,
@@ -103,6 +110,12 @@ public:
}
};
+ void get_all_column_predicate(std::set<const ColumnPredicate*>&
predicate_set) const override {
+ for (auto child_block_predicate : _block_column_predicate_vec) {
+ child_block_predicate->get_all_column_predicate(predicate_set);
+ }
+ }
+
protected:
std::vector<const BlockColumnPredicate*> _block_column_predicate_vec;
};
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index cc728bdbe7..b246352b97 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -751,6 +751,16 @@ void SegmentIterator::_vec_init_lazy_materialization() {
std::set<ColumnId> del_cond_id_set;
_opts.delete_condition_predicates->get_all_column_ids(del_cond_id_set);
+ std::set<const ColumnPredicate*> delete_predicate_set {};
+
_opts.delete_condition_predicates->get_all_column_predicate(delete_predicate_set);
+ for (const auto predicate : delete_predicate_set) {
+ if (PredicateTypeTraits::is_range(predicate->type())) {
+ _delete_range_column_ids.push_back(predicate->column_id());
+ } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) {
+ _delete_bloom_filter_column_ids.push_back(predicate->column_id());
+ }
+ }
+
if (!_col_predicates.empty() || !del_cond_id_set.empty()) {
std::set<ColumnId> short_cir_pred_col_id_set; // using set for
distinct cid
std::set<ColumnId> vec_pred_col_id_set;
@@ -1199,6 +1209,35 @@ Status SegmentIterator::next_batch(vectorized::Block*
block) {
return Status::OK();
}
+void SegmentIterator::_convert_dict_code_for_predicate_if_necessary() {
+ for (auto predicate : _short_cir_eval_predicate) {
+ _convert_dict_code_for_predicate_if_necessary_impl(predicate);
+ }
+
+ for (auto predicate : _pre_eval_block_predicate) {
+ _convert_dict_code_for_predicate_if_necessary_impl(predicate);
+ }
+
+ for (auto column_id : _delete_range_column_ids) {
+
_current_return_columns[column_id].get()->convert_dict_codes_if_necessary();
+ }
+
+ for (auto column_id : _delete_bloom_filter_column_ids) {
+
_current_return_columns[column_id].get()->generate_hash_values_for_runtime_filter();
+ }
+}
+
+void SegmentIterator::_convert_dict_code_for_predicate_if_necessary_impl(
+ ColumnPredicate* predicate) {
+ auto& column = _current_return_columns[predicate->column_id()];
+ auto* col_ptr = column.get();
+ if (PredicateTypeTraits::is_range(predicate->type())) {
+ col_ptr->convert_dict_codes_if_necessary();
+ } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) {
+ col_ptr->generate_hash_values_for_runtime_filter();
+ }
+}
+
void SegmentIterator::_update_max_row(const vectorized::Block* block) {
_estimate_row_size = false;
auto avg_row_size = block->bytes() / block->rows();
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index d57889ccbe..66e45db1cc 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -134,27 +134,9 @@ private:
bool _can_evaluated_by_vectorized(ColumnPredicate* predicate);
// Dictionary column should do something to initial.
- void _convert_dict_code_for_predicate_if_necessary() {
- for (auto predicate : _short_cir_eval_predicate) {
- auto& column = _current_return_columns[predicate->column_id()];
- auto* col_ptr = column.get();
- if (PredicateTypeTraits::is_range(predicate->type())) {
- col_ptr->convert_dict_codes_if_necessary();
- } else if
(PredicateTypeTraits::is_bloom_filter(predicate->type())) {
- col_ptr->generate_hash_values_for_runtime_filter();
- }
- }
+ void _convert_dict_code_for_predicate_if_necessary();
- for (auto predicate : _pre_eval_block_predicate) {
- auto& column = _current_return_columns[predicate->column_id()];
- auto* col_ptr = column.get();
- if (PredicateTypeTraits::is_range(predicate->type())) {
- col_ptr->convert_dict_codes_if_necessary();
- } else if
(PredicateTypeTraits::is_bloom_filter(predicate->type())) {
- col_ptr->generate_hash_values_for_runtime_filter();
- }
- }
- }
+ void _convert_dict_code_for_predicate_if_necessary_impl(ColumnPredicate*
predicate);
void _update_max_row(const vectorized::Block* block);
@@ -198,6 +180,8 @@ private:
vectorized::MutableColumns _current_return_columns;
std::vector<ColumnPredicate*> _pre_eval_block_predicate;
std::vector<ColumnPredicate*> _short_cir_eval_predicate;
+ std::vector<uint32_t> _delete_range_column_ids;
+ std::vector<uint32_t> _delete_bloom_filter_column_ids;
// when lazy materialization is enable, segmentIter need to read data at
least twice
// first, read predicate columns by various index
// second, read non-predicate columns
diff --git a/regression-test/data/delete_p0/test_segment_iterator_delete.out
b/regression-test/data/delete_p0/test_segment_iterator_delete.out
index 41a422e4f4..9dd338b435 100644
--- a/regression-test/data/delete_p0/test_segment_iterator_delete.out
+++ b/regression-test/data/delete_p0/test_segment_iterator_delete.out
@@ -77,3 +77,7 @@
4 4 4
6 6 6
+-- !sql --
+1 SSS sk6S0
+1 ttt zdges
+
diff --git
a/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy
b/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy
index 177299a447..1a357bbff4 100644
--- a/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy
+++ b/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy
@@ -17,12 +17,25 @@
suite("test_segment_iterator_delete") {
def tableName = "delete_regression_test_segment_iterator"
+ def tableName_dict =
"delete_regression_test_segment_iterator_column_dictionary"
// test duplicate key
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """ CREATE TABLE ${tableName} (c1 int NOT NULL, c2 int NOT NULL , c3
int not null ) ENGINE=OLAP DUPLICATE KEY(c1, c2) COMMENT "OLAP" DISTRIBUTED BY
HASH(c3) BUCKETS 1
PROPERTIES ( "replication_num" = "1" );"""
+ sql """ DROP TABLE IF EXISTS ${tableName_dict} """
+ sql """
+ CREATE TABLE ${tableName_dict} (
+ `tinyint_key` tinyint(4) NOT NULL,
+ `char_50_key` char(50) NOT NULL,
+ `character_key` varchar(500) NOT NULL
+ ) ENGINE=OLAP
+ AGGREGATE KEY(`tinyint_key`, `char_50_key`, `character_key`)
+ DISTRIBUTED BY HASH(`tinyint_key`) BUCKETS 1
+ PROPERTIES ( "replication_num" = "1" );
+ """
+
sql """INSERT INTO ${tableName} VALUES (1,1,1)"""
sql """INSERT INTO ${tableName} VALUES (2,2,2)"""
sql """INSERT INTO ${tableName} VALUES (3,3,3)"""
@@ -90,4 +103,9 @@ suite("test_segment_iterator_delete") {
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from
${tableName};"""
sql """drop table ${tableName} force"""
+
+ // delete ColumnDictionary
+ sql """INSERT INTO ${tableName_dict} VALUES(1, 'dddd', 'adgs'), (1, 'SSS',
'sk6S0'), (1, 'ttt', 'zdges');"""
+ sql """delete from ${tableName_dict} where character_key < "sk6S0";"""
+ qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from
${tableName_dict} order by tinyint_key, char_50_key;"""
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]