This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 8fb28244d65afb87efec42016da4dbf7981b2d19 Author: TengJianPing <[email protected]> AuthorDate: Tue May 28 23:16:02 2024 +0800 [improvement](page builder) avoid allocating big memory in ctor (#35493) ## Proposed changes Issue Number: close #xxx <!--Describe your changes.--> ## Further comments If this is a relatively large or complex change, kick off the discussion at [[email protected]](mailto:[email protected]) by explaining why you chose the solution you did and what alternatives you considered, etc... --- be/src/olap/rowset/segment_v2/binary_dict_page.cpp | 30 ++++++++++++++------ be/src/olap/rowset/segment_v2/binary_dict_page.h | 11 ++++++-- be/src/olap/rowset/segment_v2/binary_plain_page.h | 16 +++++++---- be/src/olap/rowset/segment_v2/binary_prefix_page.h | 12 ++++++-- be/src/olap/rowset/segment_v2/bitshuffle_page.h | 16 +++++++---- be/src/olap/rowset/segment_v2/column_writer.cpp | 2 +- be/src/olap/rowset/segment_v2/encoding_info.cpp | 32 ++++++++-------------- .../rowset/segment_v2/frame_of_reference_page.h | 15 +++++++--- .../rowset/segment_v2/indexed_column_writer.cpp | 2 +- be/src/olap/rowset/segment_v2/page_builder.h | 18 +++++++++++- be/src/olap/rowset/segment_v2/plain_page.h | 14 +++++++--- be/src/olap/rowset/segment_v2/rle_page.h | 20 ++++++++++---- .../rowset/segment_v2/binary_dict_page_test.cpp | 4 +++ .../rowset/segment_v2/binary_plain_page_test.cpp | 2 ++ .../rowset/segment_v2/binary_prefix_page_test.cpp | 6 ++++ .../rowset/segment_v2/bitshuffle_page_test.cpp | 4 +++ .../segment_v2/frame_of_reference_page_test.cpp | 8 ++++++ be/test/olap/rowset/segment_v2/plain_page_test.cpp | 4 +++ be/test/olap/rowset/segment_v2/rle_page_test.cpp | 6 ++++ 19 files changed, 159 insertions(+), 63 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp index ff61f1a392d..52795f0338a 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp @@ -45,17 +45,26 @@ BinaryDictPageBuilder::BinaryDictPageBuilder(const PageBuilderOptions& options) _finished(false), _data_page_builder(nullptr), _dict_builder(nullptr), - _encoding_type(DICT_ENCODING) { + _encoding_type(DICT_ENCODING) {} + +Status BinaryDictPageBuilder::init() { // initially use DICT_ENCODING // TODO: the data page builder type can be created by Factory according to user config - _data_page_builder.reset(new BitshufflePageBuilder<FieldType::OLAP_FIELD_TYPE_INT>(options)); + PageBuilder* data_page_builder_ptr = nullptr; + RETURN_IF_ERROR(BitshufflePageBuilder<FieldType::OLAP_FIELD_TYPE_INT>::create( + &data_page_builder_ptr, _options)); + _data_page_builder.reset(data_page_builder_ptr); PageBuilderOptions dict_builder_options; dict_builder_options.data_page_size = std::min(_options.data_page_size, _options.dict_page_size); dict_builder_options.is_dict_page = true; - _dict_builder.reset( - new BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>(dict_builder_options)); - reset(); + + PageBuilder* dict_builder_ptr = nullptr; + RETURN_IF_ERROR(BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>::create( + &dict_builder_ptr, dict_builder_options)); + _dict_builder.reset(static_cast<BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>*>( + dict_builder_ptr)); + return reset(); } bool BinaryDictPageBuilder::is_page_full() { @@ -148,18 +157,21 @@ OwnedSlice BinaryDictPageBuilder::finish() { return _buffer.build(); } -void BinaryDictPageBuilder::reset() { +Status BinaryDictPageBuilder::reset() { _finished = false; _buffer.reserve(_options.data_page_size + BINARY_DICT_PAGE_HEADER_SIZE); _buffer.resize(BINARY_DICT_PAGE_HEADER_SIZE); if (_encoding_type == DICT_ENCODING && _dict_builder->is_page_full()) { - _data_page_builder.reset( - new BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>(_options)); + PageBuilder* data_page_builder_ptr = nullptr; + RETURN_IF_ERROR(BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>::create( + &data_page_builder_ptr, _options)); + _data_page_builder.reset(data_page_builder_ptr); _encoding_type = PLAIN_ENCODING; } else { - _data_page_builder->reset(); + RETURN_IF_ERROR(_data_page_builder->reset()); } + return Status::OK(); } size_t BinaryDictPageBuilder::count() const { diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.h b/be/src/olap/rowset/segment_v2/binary_dict_page.h index 1592a927e5f..2a8467e7def 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.h +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.h @@ -57,9 +57,12 @@ enum { BINARY_DICT_PAGE_HEADER_SIZE = 4 }; // Data pages start with mode_ = DICT_ENCODING, when the size of dictionary // page go beyond the option_->dict_page_size, the subsequent data pages will switch // to string plain page automatically. -class BinaryDictPageBuilder : public PageBuilder { +class BinaryDictPageBuilder : public PageBuilderHelper<BinaryDictPageBuilder> { public: - BinaryDictPageBuilder(const PageBuilderOptions& options); + using Self = BinaryDictPageBuilder; + friend class PageBuilderHelper<Self>; + + Status init() override; bool is_page_full() override; @@ -67,7 +70,7 @@ public: OwnedSlice finish() override; - void reset() override; + Status reset() override; size_t count() const override; @@ -80,6 +83,8 @@ public: Status get_last_value(void* value) const override; private: + BinaryDictPageBuilder(const PageBuilderOptions& options); + PageBuilderOptions _options; bool _finished; diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h b/be/src/olap/rowset/segment_v2/binary_plain_page.h index 7bc5e020a83..3fe76c5d3ae 100644 --- a/be/src/olap/rowset/segment_v2/binary_plain_page.h +++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h @@ -44,12 +44,12 @@ namespace doris { namespace segment_v2 { template <FieldType Type> -class BinaryPlainPageBuilder : public PageBuilder { +class BinaryPlainPageBuilder : public PageBuilderHelper<BinaryPlainPageBuilder<Type>> { public: - BinaryPlainPageBuilder(const PageBuilderOptions& options) - : _size_estimate(0), _options(options) { - reset(); - } + using Self = BinaryPlainPageBuilder<Type>; + friend class PageBuilderHelper<Self>; + + Status init() override { return reset(); } bool is_page_full() override { bool ret = false; @@ -108,7 +108,7 @@ public: return _buffer.build(); } - void reset() override { + Status reset() override { _offsets.clear(); _buffer.clear(); _buffer.reserve(_options.data_page_size == 0 @@ -117,6 +117,7 @@ public: _size_estimate = sizeof(uint32_t); _finished = false; _last_value_size = 0; + return Status::OK(); } size_t count() const override { return _offsets.size(); } @@ -151,6 +152,9 @@ public: inline Slice get(std::size_t idx) const { return (*this)[idx]; } private: + BinaryPlainPageBuilder(const PageBuilderOptions& options) + : _size_estimate(0), _options(options) {} + void _copy_value_at(size_t idx, faststring* value) const { size_t value_size = (idx < _offsets.size() - 1) ? _offsets[idx + 1] - _offsets[idx] : _last_value_size; diff --git a/be/src/olap/rowset/segment_v2/binary_prefix_page.h b/be/src/olap/rowset/segment_v2/binary_prefix_page.h index 26bdb1518d5..de4ec60070b 100644 --- a/be/src/olap/rowset/segment_v2/binary_prefix_page.h +++ b/be/src/olap/rowset/segment_v2/binary_prefix_page.h @@ -41,9 +41,12 @@ namespace segment_v2 { // Entry := SharedPrefixLength(vint), UnsharedLength(vint), Byte^UnsharedLength // Trailer := NumEntry(uint32_t), RESTART_POINT_INTERVAL(uint8_t) // RestartPointStartOffset(uint32_t)^NumRestartPoints,NumRestartPoints(uint32_t) -class BinaryPrefixPageBuilder : public PageBuilder { +class BinaryPrefixPageBuilder : public PageBuilderHelper<BinaryPrefixPageBuilder> { public: - BinaryPrefixPageBuilder(const PageBuilderOptions& options) : _options(options) { reset(); } + using Self = BinaryPrefixPageBuilder; + friend class PageBuilderHelper<Self>; + + Status init() override { return reset(); } bool is_page_full() override { return size() >= _options.data_page_size; } @@ -51,12 +54,13 @@ public: OwnedSlice finish() override; - void reset() override { + Status reset() override { _restart_points_offset.clear(); _last_entry.clear(); _count = 0; _buffer.clear(); _finished = false; + return Status::OK(); } uint64_t size() const override { @@ -88,6 +92,8 @@ public: } private: + BinaryPrefixPageBuilder(const PageBuilderOptions& options) : _options(options) {} + PageBuilderOptions _options; std::vector<uint32_t> _restart_points_offset; faststring _first_entry; diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h index 0e0f5132294..004e53f3e5c 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h @@ -84,12 +84,12 @@ void warn_with_bitshuffle_error(int64_t val); // The header is followed by the bitshuffle-compressed element data. // template <FieldType Type> -class BitshufflePageBuilder : public PageBuilder { +class BitshufflePageBuilder : public PageBuilderHelper<BitshufflePageBuilder<Type>> { public: - BitshufflePageBuilder(const PageBuilderOptions& options) - : _options(options), _count(0), _remain_element_capacity(0), _finished(false) { - reset(); - } + using Self = BitshufflePageBuilder<Type>; + friend class PageBuilderHelper<Self>; + + Status init() override { return reset(); } bool is_page_full() override { return _remain_element_capacity == 0; } @@ -149,7 +149,7 @@ public: return _finish(SIZE_OF_TYPE); } - void reset() override { + Status reset() override { auto block_size = _options.data_page_size; _count = 0; _data.clear(); @@ -160,6 +160,7 @@ public: _buffer.resize(BITSHUFFLE_PAGE_HEADER_SIZE); _finished = false; _remain_element_capacity = block_size / SIZE_OF_TYPE; + return Status::OK(); } size_t count() const override { return _count; } @@ -184,6 +185,9 @@ public: } private: + BitshufflePageBuilder(const PageBuilderOptions& options) + : _options(options), _count(0), _remain_element_capacity(0), _finished(false) {} + OwnedSlice _finish(int final_size_of_type) { _data.resize(final_size_of_type * _count); diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 08a94091229..cbda176acd6 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -718,7 +718,7 @@ Status ScalarColumnWriter::finish_current_page() { // build data page body : encoded values + [nullmap] std::vector<Slice> body; OwnedSlice encoded_values = _page_builder->finish(); - _page_builder->reset(); + RETURN_IF_ERROR(_page_builder->reset()); body.push_back(encoded_values.slice()); OwnedSlice nullmap; diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp b/be/src/olap/rowset/segment_v2/encoding_info.cpp index f10aba5cd3b..717304637dd 100644 --- a/be/src/olap/rowset/segment_v2/encoding_info.cpp +++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp @@ -51,8 +51,7 @@ struct TypeEncodingTraits {}; template <FieldType type, typename CppType> struct TypeEncodingTraits<type, PLAIN_ENCODING, CppType> { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { - *builder = new PlainPageBuilder<type>(opts); - return Status::OK(); + return PlainPageBuilder<type>::create(builder, opts); } static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) { @@ -64,8 +63,7 @@ struct TypeEncodingTraits<type, PLAIN_ENCODING, CppType> { template <FieldType type> struct TypeEncodingTraits<type, PLAIN_ENCODING, Slice> { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { - *builder = new BinaryPlainPageBuilder<type>(opts); - return Status::OK(); + return BinaryPlainPageBuilder<type>::create(builder, opts); } static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) { @@ -78,8 +76,7 @@ template <FieldType type, typename CppType> struct TypeEncodingTraits<type, BIT_SHUFFLE, CppType, typename std::enable_if<!std::is_same<CppType, Slice>::value>::type> { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { - *builder = new BitshufflePageBuilder<type>(opts); - return Status::OK(); + return BitshufflePageBuilder<type>::create(builder, opts); } static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) { @@ -91,8 +88,7 @@ struct TypeEncodingTraits<type, BIT_SHUFFLE, CppType, template <> struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_BOOL, RLE, bool> { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { - *builder = new RlePageBuilder<FieldType::OLAP_FIELD_TYPE_BOOL>(opts); - return Status::OK(); + return RlePageBuilder<FieldType::OLAP_FIELD_TYPE_BOOL>::create(builder, opts); } static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) { @@ -104,8 +100,7 @@ struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_BOOL, RLE, bool> { template <FieldType type> struct TypeEncodingTraits<type, DICT_ENCODING, Slice> { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { - *builder = new BinaryDictPageBuilder(opts); - return Status::OK(); + return BinaryDictPageBuilder::create(builder, opts); } static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) { @@ -118,8 +113,7 @@ template <> struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_DATE, FOR_ENCODING, typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE>::CppType> { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { - *builder = new FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATE>(opts); - return Status::OK(); + return FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATE>::create(builder, opts); } static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) { @@ -132,8 +126,8 @@ template <> struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_DATEV2, FOR_ENCODING, typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATEV2>::CppType> { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { - *builder = new FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATEV2>(opts); - return Status::OK(); + return FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATEV2>::create(builder, + opts); } static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) { @@ -146,8 +140,8 @@ template <> struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_DATETIMEV2, FOR_ENCODING, typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>::CppType> { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { - *builder = new FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>(opts); - return Status::OK(); + return FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>::create(builder, + opts); } static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) { @@ -161,8 +155,7 @@ template <FieldType type, typename CppType> struct TypeEncodingTraits<type, FOR_ENCODING, CppType, typename std::enable_if<std::is_integral<CppType>::value>::type> { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { - *builder = new FrameOfReferencePageBuilder<type>(opts); - return Status::OK(); + return FrameOfReferencePageBuilder<type>::create(builder, opts); } static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) { @@ -174,8 +167,7 @@ struct TypeEncodingTraits<type, FOR_ENCODING, CppType, template <FieldType type> struct TypeEncodingTraits<type, PREFIX_ENCODING, Slice> { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { - *builder = new BinaryPrefixPageBuilder(opts); - return Status::OK(); + return BinaryPrefixPageBuilder::create(builder, opts); } static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) { diff --git a/be/src/olap/rowset/segment_v2/frame_of_reference_page.h b/be/src/olap/rowset/segment_v2/frame_of_reference_page.h index 64e57afa988..4477912803b 100644 --- a/be/src/olap/rowset/segment_v2/frame_of_reference_page.h +++ b/be/src/olap/rowset/segment_v2/frame_of_reference_page.h @@ -27,11 +27,14 @@ namespace segment_v2 { // Encode page use frame-of-reference coding template <FieldType Type> -class FrameOfReferencePageBuilder : public PageBuilder { +class FrameOfReferencePageBuilder : public PageBuilderHelper<FrameOfReferencePageBuilder<Type>> { public: - explicit FrameOfReferencePageBuilder(const PageBuilderOptions& options) - : _options(options), _count(0), _finished(false) { + using Self = FrameOfReferencePageBuilder<Type>; + friend class PageBuilderHelper<Self>; + + Status init() override { _encoder.reset(new ForEncoder<CppType>(&_buf)); + return Status::OK(); } bool is_page_full() override { return _encoder->len() >= _options.data_page_size; } @@ -58,10 +61,11 @@ public: return _buf.build(); } - void reset() override { + Status reset() override { _count = 0; _finished = false; _encoder->clear(); + return Status::OK(); } size_t count() const override { return _count; } @@ -85,6 +89,9 @@ public: } private: + explicit FrameOfReferencePageBuilder(const PageBuilderOptions& options) + : _options(options), _count(0), _finished(false) {} + typedef typename TypeTraits<Type>::CppType CppType; PageBuilderOptions _options; size_t _count; diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp index 003670796c2..e1b238084a9 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp @@ -118,7 +118,7 @@ Status IndexedColumnWriter::_finish_current_data_page(size_t& num_val) { // IndexedColumn doesn't have NULLs, thus data page body only contains encoded values OwnedSlice page_body = _data_page_builder->finish(); - _data_page_builder->reset(); + RETURN_IF_ERROR(_data_page_builder->reset()); PageFooterPB footer; footer.set_type(DATA_PAGE); diff --git a/be/src/olap/rowset/segment_v2/page_builder.h b/be/src/olap/rowset/segment_v2/page_builder.h index 5df2ce949b9..61fa2eaf8e1 100644 --- a/be/src/olap/rowset/segment_v2/page_builder.h +++ b/be/src/olap/rowset/segment_v2/page_builder.h @@ -19,6 +19,7 @@ #include <stdint.h> +#include <memory> #include <vector> #include "common/status.h" @@ -42,6 +43,9 @@ public: virtual ~PageBuilder() {} + // Init the internal state of the page builder. + virtual Status init() = 0; + // Used by column writer to determine whether the current page is full. // Column writer depends on the result to decide whether to flush current page. virtual bool is_page_full() = 0; @@ -69,7 +73,7 @@ public: // Reset the internal state of the page builder. // // Any data previously returned by finish may be invalidated by this call. - virtual void reset() = 0; + virtual Status reset() = 0; // Return the number of entries that have been added to the page. virtual size_t count() const = 0; @@ -91,5 +95,17 @@ private: DISALLOW_COPY_AND_ASSIGN(PageBuilder); }; +template <typename Derived> +class PageBuilderHelper : public PageBuilder { +public: + template <typename... Args> + static Status create(PageBuilder** builder, Args&&... args) { + std::unique_ptr<PageBuilder> builder_uniq_ptr(new Derived(std::forward<Args>(args)...)); + RETURN_IF_ERROR(builder_uniq_ptr->init()); + *builder = builder_uniq_ptr.release(); + return Status::OK(); + } +}; + } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/plain_page.h b/be/src/olap/rowset/segment_v2/plain_page.h index cb9236ee315..cbcc96f31ba 100644 --- a/be/src/olap/rowset/segment_v2/plain_page.h +++ b/be/src/olap/rowset/segment_v2/plain_page.h @@ -31,13 +31,16 @@ namespace segment_v2 { static const size_t PLAIN_PAGE_HEADER_SIZE = sizeof(uint32_t); template <FieldType Type> -class PlainPageBuilder : public PageBuilder { +class PlainPageBuilder : public PageBuilderHelper<PlainPageBuilder<Type> > { public: - PlainPageBuilder(const PageBuilderOptions& options) : _options(options) { + using Self = PlainPageBuilder<Type>; + friend class PageBuilderHelper<Self>; + + Status init() override { // Reserve enough space for the page, plus a bit of slop since // we often overrun the page by a few values. _buffer.reserve(_options.data_page_size + 1024); - reset(); + return reset(); } bool is_page_full() override { return _buffer.size() > _options.data_page_size; } @@ -66,11 +69,12 @@ public: return _buffer.build(); } - void reset() override { + Status reset() override { _buffer.reserve(_options.data_page_size + 1024); _count = 0; _buffer.clear(); _buffer.resize(PLAIN_PAGE_HEADER_SIZE); + return Status::OK(); } size_t count() const override { return _count; } @@ -94,6 +98,8 @@ public: } private: + PlainPageBuilder(const PageBuilderOptions& options) : _options(options) {} + faststring _buffer; PageBuilderOptions _options; size_t _count; diff --git a/be/src/olap/rowset/segment_v2/rle_page.h b/be/src/olap/rowset/segment_v2/rle_page.h index bdc94a7080a..40ec587743c 100644 --- a/be/src/olap/rowset/segment_v2/rle_page.h +++ b/be/src/olap/rowset/segment_v2/rle_page.h @@ -51,10 +51,12 @@ enum { RLE_PAGE_HEADER_SIZE = 4 }; // // TODO(hkp): optimize rle algorithm template <FieldType Type> -class RlePageBuilder : public PageBuilder { +class RlePageBuilder : public PageBuilderHelper<RlePageBuilder<Type> > { public: - RlePageBuilder(const PageBuilderOptions& options) - : _options(options), _count(0), _finished(false), _bit_width(0), _rle_encoder(nullptr) { + using Self = RlePageBuilder<Type>; + friend class PageBuilderHelper<Self>; + + Status init() override { switch (Type) { case FieldType::OLAP_FIELD_TYPE_BOOL: { _bit_width = 1; @@ -66,7 +68,7 @@ public: } } _rle_encoder = new RleEncoder<CppType>(&_buf, _bit_width); - reset(); + return reset(); } ~RlePageBuilder() { delete _rle_encoder; } @@ -102,11 +104,12 @@ public: return _buf.build(); } - void reset() override { + Status reset() override { _count = 0; _finished = false; _rle_encoder->Clear(); _rle_encoder->Reserve(RLE_PAGE_HEADER_SIZE, 0); + return Status::OK(); } size_t count() const override { return _count; } @@ -132,6 +135,13 @@ public: } private: + RlePageBuilder(const PageBuilderOptions& options) + : _options(options), + _count(0), + _finished(false), + _bit_width(0), + _rle_encoder(nullptr) {} + typedef typename TypeTraits<Type>::CppType CppType; enum { SIZE_OF_TYPE = TypeTraits<Type>::size }; diff --git a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp index 87097fe1aa1..e1b40508485 100644 --- a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp @@ -43,6 +43,8 @@ public: options.data_page_size = 256 * 1024; options.dict_page_size = 256 * 1024; BinaryDictPageBuilder page_builder(options); + Status ret0 = page_builder.init(); + EXPECT_TRUE(ret0.ok()); size_t count = slices.size(); const Slice* ptr = &slices[0]; @@ -132,6 +134,8 @@ public: options.data_page_size = 1 * 1024 * 1024; options.dict_page_size = 1 * 1024 * 1024; BinaryDictPageBuilder page_builder(options); + Status ret0 = page_builder.init(); + EXPECT_TRUE(ret0.ok()); size_t count = contents.size(); std::vector<OwnedSlice> results; std::vector<size_t> page_start_ids; diff --git a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp index 45d824c731e..7a7cf7aa23c 100644 --- a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp @@ -47,6 +47,8 @@ public: PageBuilderOptions options; options.data_page_size = 256 * 1024; PageBuilderType page_builder(options); + Status ret0 = page_builder.init(); + EXPECT_TRUE(ret0.ok()); size_t count = slices.size(); Slice* ptr = &slices[0]; diff --git a/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp index e6aefd49e16..e87c15fff09 100644 --- a/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp @@ -48,6 +48,8 @@ public: // encode PageBuilderOptions options; BinaryPrefixPageBuilder page_builder(options); + Status ret0 = page_builder.init(); + EXPECT_TRUE(ret0.ok()); size_t count = slices.size(); const Slice* ptr = &slices[0]; @@ -162,6 +164,8 @@ public: // encode PageBuilderOptions options; BinaryPrefixPageBuilder page_builder(options); + Status ret0 = page_builder.init(); + EXPECT_TRUE(ret0.ok()); size_t count = slices.size(); const Slice* ptr = &slices[0]; @@ -273,6 +277,8 @@ public: // encode PageBuilderOptions options; BinaryPrefixPageBuilder page_builder(options); + Status ret0 = page_builder.init(); + EXPECT_TRUE(ret0.ok()); size_t count = slices.size(); const Slice* ptr = &slices[0]; diff --git a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp index 75d09363ee2..2165e9d19f0 100644 --- a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp @@ -56,6 +56,8 @@ public: PageBuilderOptions options; options.data_page_size = 256 * 1024; PageBuilderType page_builder(options); + Status ret0 = page_builder.init(); + EXPECT_TRUE(ret0.ok()); page_builder.add(reinterpret_cast<const uint8_t*>(src), &size); OwnedSlice s = page_builder.finish(); @@ -121,6 +123,8 @@ public: PageBuilderOptions options; options.data_page_size = 256 * 1024; PageBuilderType page_builder(options); + Status ret0 = page_builder.init(); + EXPECT_TRUE(ret0.ok()); page_builder.add(reinterpret_cast<const uint8_t*>(src), &size); OwnedSlice s = page_builder.finish(); diff --git a/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp b/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp index 500d638aa69..f2bbd87a9b7 100644 --- a/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp @@ -50,6 +50,8 @@ public: PageBuilderOptions builder_options; builder_options.data_page_size = 256 * 1024; PageBuilderType for_page_builder(builder_options); + Status ret0 = for_page_builder.init(); + EXPECT_TRUE(ret0.ok()); for_page_builder.add(reinterpret_cast<const uint8_t*>(src), &size); OwnedSlice s = for_page_builder.finish(); EXPECT_EQ(size, for_page_builder.count()); @@ -234,6 +236,8 @@ TEST_F(FrameOfReferencePageTest, TestInt32SequenceBlockEncoderSize) { builder_options.data_page_size = 256 * 1024; segment_v2::FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_INT> page_builder( builder_options); + Status ret0 = page_builder.init(); + EXPECT_TRUE(ret0.ok()); page_builder.add(reinterpret_cast<const uint8_t*>(ints.get()), &size); OwnedSlice s = page_builder.finish(); // body: 4 bytes min value + 128 * 1 /8 packing value = 20 @@ -251,6 +255,8 @@ TEST_F(FrameOfReferencePageTest, TestFirstLastValue) { builder_options.data_page_size = 256 * 1024; segment_v2::FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_INT> page_builder( builder_options); + Status ret0 = page_builder.init(); + EXPECT_TRUE(ret0.ok()); page_builder.add(reinterpret_cast<const uint8_t*>(ints.get()), &size); OwnedSlice s = page_builder.finish(); int32_t first_value = -1; @@ -271,6 +277,8 @@ TEST_F(FrameOfReferencePageTest, TestInt32NormalBlockEncoderSize) { builder_options.data_page_size = 256 * 1024; segment_v2::FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_INT> page_builder( builder_options); + Status ret0 = page_builder.init(); + EXPECT_TRUE(ret0.ok()); page_builder.add(reinterpret_cast<const uint8_t*>(ints.get()), &size); OwnedSlice s = page_builder.finish(); // body: 4 bytes min value + 128 * 7 /8 packing value = 116 diff --git a/be/test/olap/rowset/segment_v2/plain_page_test.cpp b/be/test/olap/rowset/segment_v2/plain_page_test.cpp index bd18215522c..cd1e3163416 100644 --- a/be/test/olap/rowset/segment_v2/plain_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/plain_page_test.cpp @@ -64,6 +64,8 @@ public: PageBuilderOptions options; options.data_page_size = 256 * 1024; PageBuilderType page_builder(options); + Status ret0 = page_builder.init(); + EXPECT_TRUE(ret0.ok()); page_builder.add(reinterpret_cast<const uint8_t*>(src), &size); OwnedSlice s = page_builder.finish(); @@ -120,6 +122,8 @@ public: PageBuilderOptions options; options.data_page_size = 256 * 1024; PageBuilderType page_builder(options); + Status ret0 = page_builder.init(); + EXPECT_TRUE(ret0.ok()); page_builder.add(reinterpret_cast<const uint8_t*>(src), &size); OwnedSlice s = page_builder.finish(); diff --git a/be/test/olap/rowset/segment_v2/rle_page_test.cpp b/be/test/olap/rowset/segment_v2/rle_page_test.cpp index ae7996f4a43..c97d70f9ac6 100644 --- a/be/test/olap/rowset/segment_v2/rle_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/rle_page_test.cpp @@ -54,6 +54,8 @@ public: PageBuilderOptions builder_options; builder_options.data_page_size = 256 * 1024; PageBuilderType rle_page_builder(builder_options); + Status ret0 = rle_page_builder.init(); + EXPECT_TRUE(ret0.ok()); rle_page_builder.add(reinterpret_cast<const uint8_t*>(src), &size); OwnedSlice s = rle_page_builder.finish(); EXPECT_EQ(size, rle_page_builder.count()); @@ -155,6 +157,8 @@ TEST_F(RlePageTest, TestRleInt32BlockEncoderSize) { PageBuilderOptions builder_options; builder_options.data_page_size = 256 * 1024; segment_v2::RlePageBuilder<FieldType::OLAP_FIELD_TYPE_INT> rle_page_builder(builder_options); + Status ret0 = rle_page_builder.init(); + EXPECT_TRUE(ret0.ok()); rle_page_builder.add(reinterpret_cast<const uint8_t*>(ints.get()), &size); OwnedSlice s = rle_page_builder.finish(); // 4 bytes header @@ -191,6 +195,8 @@ TEST_F(RlePageTest, TestRleBoolBlockEncoderSize) { PageBuilderOptions builder_options; builder_options.data_page_size = 256 * 1024; segment_v2::RlePageBuilder<FieldType::OLAP_FIELD_TYPE_BOOL> rle_page_builder(builder_options); + Status ret0 = rle_page_builder.init(); + EXPECT_TRUE(ret0.ok()); rle_page_builder.add(reinterpret_cast<const uint8_t*>(bools.get()), &size); OwnedSlice s = rle_page_builder.finish(); // 4 bytes header --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
