This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch dev-1.0.1 in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 28892f28075d20177ad55ca203d23b5f9ab240b5 Author: ZenoYang <[email protected]> AuthorDate: Thu Apr 7 11:29:26 2022 +0800 [fix](storage) Fix core bug of convert to predicate column (#8833) recurrent: When `enable_low_cardinality_optimize = true`, for the TPCH dataset, using the following SQL query will Core ```sql select count(*) from lineitem where l_comment = 'ously even exc'; ``` This SQL will trigger the execution of `ColumnDictionary::convert_to_predicate_column_if_dictionary`, and `res->reserve(_codes.size())` is problematic because the current `_codes.size()` is smaller than its reserve value, so inserting a value into `PredicateColumn` will Core. --- be/src/olap/rowset/segment_v2/binary_dict_page.cpp | 2 +- be/src/vec/columns/column.h | 2 +- be/src/vec/columns/column_dictionary.h | 14 +++++++++----- be/src/vec/columns/column_nullable.h | 6 ++---- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp index 28a6e62b36..95a9fd5c7b 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp @@ -240,7 +240,7 @@ void BinaryDictPageDecoder::set_dict_decoder(PageDecoder* dict_decoder, StringRe Status BinaryDictPageDecoder::next_batch(size_t* n, vectorized::MutableColumnPtr &dst) { if (_encoding_type == PLAIN_ENCODING) { - dst = (*(std::move(dst->convert_to_predicate_column_if_dictionary()))).assume_mutable(); + dst = dst->convert_to_predicate_column_if_dictionary(); return _data_page_decoder->next_batch(n, dst); } // dictionary encoding diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 7e717bb984..610babd91a 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -66,7 +66,7 @@ public: /// If column isn't ColumnDictionary, return itself. /// If column is ColumnDictionary, transforms is to predicate column. - virtual Ptr convert_to_predicate_column_if_dictionary() { return get_ptr(); } + virtual MutablePtr convert_to_predicate_column_if_dictionary() { return get_ptr(); } /// If column is ColumnDictionary, and is a range comparison predicate, convert dict encoding virtual void convert_dict_codes_if_necessary() {} diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 499a04abdf..76f9516c9c 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -120,7 +120,10 @@ public: LOG(FATAL) << "get_permutation not supported in ColumnDictionary"; } - void reserve(size_t n) override { _codes.reserve(n); } + void reserve(size_t n) override { + _reserve_size = n; + _codes.reserve(n); + } const char* get_family_name() const override { return "ColumnDictionary"; } @@ -259,15 +262,15 @@ public: bool is_dict_code_converted() const { return _dict_code_converted; } - ColumnPtr convert_to_predicate_column_if_dictionary() override { + MutableColumnPtr convert_to_predicate_column_if_dictionary() override { auto res = vectorized::PredicateColumnType<StringValue>::create(); - size_t size = _codes.size(); - res->reserve(size); - for (size_t i = 0; i < size; ++i) { + res->reserve(_reserve_size); + for (size_t i = 0; i < _codes.size(); ++i) { auto& code = reinterpret_cast<T&>(_codes[i]); auto value = _dict.get_value(code); res->insert_data(value.ptr, value.len); } + clear(); _dict.clear(); return res; } @@ -365,6 +368,7 @@ public: }; private: + size_t _reserve_size; bool _dict_inited = false; bool _dict_sorted = false; bool _dict_code_converted = false; diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index aa0df2dc1c..482b9a5bfe 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -267,10 +267,8 @@ public: LOG(FATAL) << "should not call the method in column nullable"; } - ColumnPtr convert_to_predicate_column_if_dictionary() override { - IColumn* nested_ptr = get_nested_column_ptr().get(); - nested_ptr = (*(std::move(nested_ptr->convert_to_predicate_column_if_dictionary() - ))).assume_mutable(); + MutableColumnPtr convert_to_predicate_column_if_dictionary() override { + nested_column = get_nested_column().convert_to_predicate_column_if_dictionary(); return get_ptr(); } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
