This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch dev-1.0.1 in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 3659b8bb90a97761d6d72e938113bdff86cc846e Author: ZenoYang <[email protected]> AuthorDate: Fri Apr 29 11:45:04 2022 +0800 [fix](vectorized) Query get wrong result when ColumnDict concurrent predicate eval (#9270) --- be/src/olap/column_predicate.h | 4 +- be/src/olap/comparison_predicate.cpp | 96 ++++------------------ be/src/olap/comparison_predicate.h | 3 - be/src/olap/in_list_predicate.cpp | 37 +-------- be/src/olap/in_list_predicate.h | 5 +- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 3 +- 6 files changed, 22 insertions(+), 126 deletions(-) diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index 45f8fcfa7f..56f817d9f3 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -42,7 +42,7 @@ enum class PredicateType { GT = 5, GE = 6, IN_LIST = 7, - NO_IN_LIST = 8, + NOT_IN_LIST = 8, IS_NULL = 9, NOT_IS_NULL = 10, BF = 11, // BloomFilter @@ -86,8 +86,6 @@ public: virtual void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const {}; uint32_t column_id() const { return _column_id; } - virtual void set_dict_code_if_necessary(vectorized::IColumn& column) { } - protected: uint32_t _column_id; bool _opposite; diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp index 57549c349c..45a89f92ad 100644 --- a/be/src/olap/comparison_predicate.cpp +++ b/be/src/olap/comparison_predicate.cpp @@ -147,7 +147,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >) COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) // todo(zeno) define interface in IColumn to simplify code -#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP) \ +#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE) \ template <class T> \ void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \ uint16_t new_size = 0; \ @@ -163,11 +163,14 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) auto* nested_col_ptr = vectorized::check_and_get_column< \ vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \ auto& data_array = nested_col_ptr->get_data(); \ + auto dict_code = \ + IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 0 OP 1, 1 OP 1) \ + : nested_col_ptr->find_code(_value); \ for (uint16_t i = 0; i < *size; i++) { \ uint16_t idx = sel[i]; \ sel[new_size] = idx; \ const auto& cell_value = data_array[idx]; \ - bool ret = !null_bitmap[idx] && (cell_value OP _dict_code); \ + bool ret = !null_bitmap[idx] && (cell_value OP dict_code); \ new_size += _opposite ? !ret : ret; \ } \ } \ @@ -184,20 +187,20 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) new_size += _opposite ? !ret : ret; \ } \ } \ - *size = new_size; \ } else if (column.is_column_dictionary()) { \ if constexpr (std::is_same_v<T, StringValue>) { \ auto& dict_col = \ reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(column);\ auto& data_array = dict_col.get_data(); \ + auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1) \ + : dict_col.find_code(_value); \ for (uint16_t i = 0; i < *size; ++i) { \ uint16_t idx = sel[i]; \ sel[new_size] = idx; \ const auto& cell_value = data_array[idx]; \ - bool ret = cell_value OP _dict_code; \ + bool ret = cell_value OP dict_code; \ new_size += _opposite ? !ret : ret; \ } \ - *size = new_size; \ } \ } else { \ auto& pred_column_ref = \ @@ -210,17 +213,17 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) auto ret = cell_value OP _value; \ new_size += _opposite ? !ret : ret; \ } \ - *size = new_size; \ } \ + *size = new_size; \ } -COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==) -COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=) -COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <) -COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=) -COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >) -COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=) +COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==, false) +COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=, false) +COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <, true) +COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=, true) +COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >, true) +COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true) #define COMPARISON_PRED_COLUMN_EVALUATE_VEC(CLASS, OP) \ template <class T> \ @@ -502,65 +505,6 @@ COMPARISON_PRED_BITMAP_EVALUATE(LessEqualPredicate, <=) COMPARISON_PRED_BITMAP_EVALUATE(GreaterPredicate, >) COMPARISON_PRED_BITMAP_EVALUATE(GreaterEqualPredicate, >=) - -#define COMPARISON_PRED_SET_DICT_CODE(CLASS) \ - template <class T> \ - void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) { \ - if (_dict_code_inited) { \ - return; \ - } \ - if constexpr (std::is_same_v<T, StringValue>) { \ - auto* col_ptr = column.get_ptr().get(); \ - if (column.is_nullable()) { \ - auto nullable_col = \ - reinterpret_cast<vectorized::ColumnNullable*>(col_ptr); \ - col_ptr = nullable_col->get_nested_column_ptr().get(); \ - } \ - if (col_ptr->is_column_dictionary()) { \ - auto& dict_col = \ - reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \ - *col_ptr); \ - _dict_code = dict_col.find_code(_value); \ - _dict_code_inited = true; \ - } \ - } \ - } - -COMPARISON_PRED_SET_DICT_CODE(EqualPredicate) -COMPARISON_PRED_SET_DICT_CODE(NotEqualPredicate) - -// If 1 OP 0 returns true, it means the predicate is > or >= -// If 1 OP 1 returns true, it means the predicate is >= or <= -// by this way, avoid redundant code -#define RAMGE_COMPARISON_PRED_SET_DICT_CODE(CLASS, OP) \ - template <class T> \ - void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) { \ - if (_dict_code_inited) { \ - return; \ - } \ - if constexpr (std::is_same_v<T, StringValue>) { \ - auto* col_ptr = column.get_ptr().get(); \ - if (column.is_nullable()) { \ - auto nullable_col = \ - reinterpret_cast<vectorized::ColumnNullable*>(col_ptr); \ - col_ptr = nullable_col->get_nested_column_ptr().get(); \ - } \ - \ - if (col_ptr->is_column_dictionary()) { \ - auto& dict_col = \ - reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \ - *col_ptr); \ - _dict_code = dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1); \ - _dict_code_inited = true; \ - } \ - } \ - } - -RAMGE_COMPARISON_PRED_SET_DICT_CODE(LessPredicate, <) -RAMGE_COMPARISON_PRED_SET_DICT_CODE(LessEqualPredicate, <=) -RAMGE_COMPARISON_PRED_SET_DICT_CODE(GreaterPredicate, >) -RAMGE_COMPARISON_PRED_SET_DICT_CODE(GreaterEqualPredicate, >=) - #define COMPARISON_PRED_CONSTRUCTOR_DECLARATION(CLASS) \ template CLASS<int8_t>::CLASS(uint32_t column_id, const int8_t& value, bool opposite); \ template CLASS<int16_t>::CLASS(uint32_t column_id, const int16_t& value, bool opposite); \ @@ -745,14 +689,4 @@ COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(LessEqualPredicate) COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterPredicate) COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterEqualPredicate) -#define COMPARISON_PRED_SET_DICT_CODE_DECLARATION(CLASS) \ -template void CLASS<StringValue>::set_dict_code_if_necessary(vectorized::IColumn& column); - -COMPARISON_PRED_SET_DICT_CODE_DECLARATION(EqualPredicate) -COMPARISON_PRED_SET_DICT_CODE_DECLARATION(NotEqualPredicate) -COMPARISON_PRED_SET_DICT_CODE_DECLARATION(LessPredicate) -COMPARISON_PRED_SET_DICT_CODE_DECLARATION(LessEqualPredicate) -COMPARISON_PRED_SET_DICT_CODE_DECLARATION(GreaterPredicate) -COMPARISON_PRED_SET_DICT_CODE_DECLARATION(GreaterEqualPredicate) - } //namespace doris diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index 3df31c370d..7e7f718a76 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -47,11 +47,8 @@ class VectorizedRowBatch; void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \ bool* flags) const override; \ void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const override; \ - void set_dict_code_if_necessary(vectorized::IColumn& column) override; \ private: \ T _value; \ - bool _dict_code_inited = false; \ - int32_t _dict_code; \ }; COMPARISON_PRED_CLASS_DEFINE(EqualPredicate, EQ) diff --git a/be/src/olap/in_list_predicate.cpp b/be/src/olap/in_list_predicate.cpp index 0401673926..9b78a8705f 100644 --- a/be/src/olap/in_list_predicate.cpp +++ b/be/src/olap/in_list_predicate.cpp @@ -134,12 +134,13 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==) auto* nested_col_ptr = vectorized::check_and_get_column< \ vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \ auto& data_array = nested_col_ptr->get_data(); \ + auto dict_codes = nested_col_ptr->find_codes(_values); \ for (uint16_t i = 0; i < *size; i++) { \ uint16_t idx = sel[i]; \ sel[new_size] = idx; \ const auto& cell_value = data_array[idx]; \ bool ret = !null_bitmap[idx] \ - && (_dict_codes.find(cell_value) OP _dict_codes.end()); \ + && (dict_codes.find(cell_value) OP dict_codes.end()); \ new_size += _opposite ? !ret : ret; \ } \ } \ @@ -155,18 +156,18 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==) new_size += _opposite ? !ret : ret; \ } \ } \ - *size = new_size; \ } else if (column.is_column_dictionary()) { \ if constexpr (std::is_same_v<T, StringValue>) { \ auto& dict_col = \ reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \ column); \ auto& data_array = dict_col.get_data(); \ + auto dict_codes = dict_col.find_codes(_values); \ for (uint16_t i = 0; i < *size; i++) { \ uint16_t idx = sel[i]; \ sel[new_size] = idx; \ const auto& cell_value = data_array[idx]; \ - auto result = (_dict_codes.find(cell_value) OP _dict_codes.end()); \ + auto result = (dict_codes.find(cell_value) OP dict_codes.end()); \ new_size += _opposite ? !result : result; \ } \ } \ @@ -282,32 +283,6 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(NotInListPredicate, ==) IN_LIST_PRED_BITMAP_EVALUATE(InListPredicate, &=) IN_LIST_PRED_BITMAP_EVALUATE(NotInListPredicate, -=) -#define IN_LIST_PRED_SET_DICT_CODE(CLASS) \ - template <class T> \ - void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) { \ - if (_dict_code_inited) { \ - return; \ - } \ - if constexpr (std::is_same_v<T, StringValue>) { \ - auto* col_ptr = column.get_ptr().get(); \ - if (column.is_nullable()) { \ - auto nullable_col = \ - reinterpret_cast<vectorized::ColumnNullable*>(col_ptr); \ - col_ptr = nullable_col->get_nested_column_ptr().get(); \ - } \ - if (col_ptr->is_column_dictionary()) { \ - auto& dict_col = \ - reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \ - *col_ptr); \ - _dict_codes = dict_col.find_codes(_values); \ - _dict_code_inited = true; \ - } \ - } \ - } - -IN_LIST_PRED_SET_DICT_CODE(InListPredicate) -IN_LIST_PRED_SET_DICT_CODE(NotInListPredicate) - #define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS) \ template CLASS<int8_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int8_t>&& values, \ bool opposite); \ @@ -415,8 +390,4 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotInListPredicate) IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(InListPredicate) IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(NotInListPredicate) -template void InListPredicate<StringValue>::set_dict_code_if_necessary(vectorized::IColumn& column); -template void NotInListPredicate<StringValue>::set_dict_code_if_necessary( - vectorized::IColumn& column); - } //namespace doris diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index 089ee84a06..33682b6824 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -96,15 +96,12 @@ class VectorizedRowBatch; void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override; \ void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \ void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \ - void set_dict_code_if_necessary(vectorized::IColumn& column) override; \ private: \ phmap::flat_hash_set<T> _values; \ - bool _dict_code_inited = false; \ - phmap::flat_hash_set<int32_t> _dict_codes; \ }; IN_LIST_PRED_CLASS_DEFINE(InListPredicate, IN_LIST) -IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NO_IN_LIST) +IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NOT_IN_LIST) } //namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 52cc584fdb..9484601f12 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -634,7 +634,7 @@ void SegmentIterator::_vec_init_lazy_materialization() { if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_STRING || predicate->type() == PredicateType::BF || predicate->type() == PredicateType::IN_LIST || - predicate->type() == PredicateType::NO_IN_LIST) { + predicate->type() == PredicateType::NOT_IN_LIST) { short_cir_pred_col_id_set.insert(cid); _short_cir_eval_predicate.push_back(predicate); _is_all_column_basic_type = false; @@ -867,7 +867,6 @@ void SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_ predicate->type() == PredicateType::GT || predicate->type() == PredicateType::GE) { col_ptr->convert_dict_codes_if_necessary(); } - predicate->set_dict_code_if_necessary(*short_cir_column); predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, selected_size_ptr); } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
