This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch dev-1.0.1 in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 0562bab929f2d2cb2a4dbec44f8e89560867bbaa Author: ZenoYang <[email protected]> AuthorDate: Wed May 18 14:57:13 2022 +0800 [fix](storage) low_cardinality_optimize core dump when is null predicate (#9586) Issue Number: close #9555 Make the last value of the dictionary null, when ColumnDict inserts a null value, add the encoding corresponding to the last value of the dictionary· --- be/src/olap/comparison_predicate.cpp | 7 +++++-- be/src/vec/columns/column_dictionary.h | 30 ++++++++++++++---------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp index 45a89f92ad..363c75e5b8 100644 --- a/be/src/olap/comparison_predicate.cpp +++ b/be/src/olap/comparison_predicate.cpp @@ -147,6 +147,9 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >) COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) // todo(zeno) define interface in IColumn to simplify code +// If 1 OP 0 returns true, it means the predicate is > or >= +// If 1 OP 1 returns true, it means the predicate is >= or <= +// by this way, avoid redundant code #define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE) \ template <class T> \ void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \ @@ -164,7 +167,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \ auto& data_array = nested_col_ptr->get_data(); \ auto dict_code = \ - IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 0 OP 1, 1 OP 1) \ + IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 1 OP 0, 1 OP 1) \ : nested_col_ptr->find_code(_value); \ for (uint16_t i = 0; i < *size; i++) { \ uint16_t idx = sel[i]; \ @@ -192,7 +195,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) auto& dict_col = \ reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(column);\ auto& data_array = dict_col.get_data(); \ - auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1) \ + auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1) \ : dict_col.find_code(_value); \ for (uint16_t i = 0; i < *size; ++i) { \ uint16_t idx = sel[i]; \ diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 7d7117aee9..cc27ca1cdb 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -97,12 +97,10 @@ public: } void insert_data(const char* pos, size_t /*length*/) override { - _codes.push_back(unaligned_load<T>(pos)); + LOG(FATAL) << "insert_data not supported in ColumnDictionary"; } - void insert_data(const T value) { _codes.push_back(value); } - - void insert_default() override { _codes.push_back(T()); } + void insert_default() override { _codes.push_back(_dict.get_null_code()); } void clear() override { _codes.clear(); @@ -218,13 +216,12 @@ public: void insert_many_dict_data(const int32_t* data_array, size_t start_index, const StringRef* dict_array, size_t data_num, uint32_t dict_num) override { - if (!is_dict_inited()) { + if (_dict.empty()) { _dict.reserve(dict_num); for (uint32_t i = 0; i < dict_num; ++i) { auto value = StringValue(dict_array[i].data, dict_array[i].size); _dict.insert_value(value); } - _dict_inited = true; } char* end_ptr = (char*)_codes.get_end_ptr(); @@ -266,8 +263,6 @@ public: return _dict.find_codes(values); } - bool is_dict_inited() const { return _dict_inited; } - bool is_dict_sorted() const { return _dict_sorted; } bool is_dict_code_converted() const { return _dict_code_converted; } @@ -304,13 +299,17 @@ public: if (it != _inverted_index.end()) { return it->second; } - return -1; + return -2; // -1 is null code + } + + T get_null_code() { return -1; } + + inline StringValue& get_value(T code) { + return code >= _dict_data.size() ? _null_value : _dict_data[code]; } - inline StringValue& get_value(T code) { return _dict_data[code]; } - inline void generate_hash_values() { - if (_hash_values.size() == 0) { + if (_hash_values.empty()) { _hash_values.resize(_dict_data.size()); for (size_t i = 0; i < _dict_data.size(); i++) { auto& sv = _dict_data[i]; @@ -387,7 +386,10 @@ public: size_t byte_size() { return _dict_data.size() * sizeof(_dict_data[0]); } + bool empty() { return _dict_data.empty(); } + private: + StringValue _null_value = StringValue(); StringValue::Comparator _comparator; // dict code -> dict value DictContainer _dict_data; @@ -405,16 +407,12 @@ public: private: size_t _reserve_size; - bool _dict_inited = false; bool _dict_sorted = false; bool _dict_code_converted = false; Dictionary _dict; Container _codes; }; -template class ColumnDictionary<uint8_t>; -template class ColumnDictionary<uint16_t>; -template class ColumnDictionary<uint32_t>; template class ColumnDictionary<int32_t>; using ColumnDictI32 = vectorized::ColumnDictionary<doris::vectorized::Int32>; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
