This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 8fb28244d65afb87efec42016da4dbf7981b2d19
Author: TengJianPing <[email protected]>
AuthorDate: Tue May 28 23:16:02 2024 +0800

    [improvement](page builder) avoid allocating big memory in ctor (#35493)
    
    ## Proposed changes
    
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
    
    ## Further comments
    
    If this is a relatively large or complex change, kick off the discussion
    at [[email protected]](mailto:[email protected]) by explaining why
    you chose the solution you did and what alternatives you considered,
    etc...
---
 be/src/olap/rowset/segment_v2/binary_dict_page.cpp | 30 ++++++++++++++------
 be/src/olap/rowset/segment_v2/binary_dict_page.h   | 11 ++++++--
 be/src/olap/rowset/segment_v2/binary_plain_page.h  | 16 +++++++----
 be/src/olap/rowset/segment_v2/binary_prefix_page.h | 12 ++++++--
 be/src/olap/rowset/segment_v2/bitshuffle_page.h    | 16 +++++++----
 be/src/olap/rowset/segment_v2/column_writer.cpp    |  2 +-
 be/src/olap/rowset/segment_v2/encoding_info.cpp    | 32 ++++++++--------------
 .../rowset/segment_v2/frame_of_reference_page.h    | 15 +++++++---
 .../rowset/segment_v2/indexed_column_writer.cpp    |  2 +-
 be/src/olap/rowset/segment_v2/page_builder.h       | 18 +++++++++++-
 be/src/olap/rowset/segment_v2/plain_page.h         | 14 +++++++---
 be/src/olap/rowset/segment_v2/rle_page.h           | 20 ++++++++++----
 .../rowset/segment_v2/binary_dict_page_test.cpp    |  4 +++
 .../rowset/segment_v2/binary_plain_page_test.cpp   |  2 ++
 .../rowset/segment_v2/binary_prefix_page_test.cpp  |  6 ++++
 .../rowset/segment_v2/bitshuffle_page_test.cpp     |  4 +++
 .../segment_v2/frame_of_reference_page_test.cpp    |  8 ++++++
 be/test/olap/rowset/segment_v2/plain_page_test.cpp |  4 +++
 be/test/olap/rowset/segment_v2/rle_page_test.cpp   |  6 ++++
 19 files changed, 159 insertions(+), 63 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp 
b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
index ff61f1a392d..52795f0338a 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
@@ -45,17 +45,26 @@ BinaryDictPageBuilder::BinaryDictPageBuilder(const 
PageBuilderOptions& options)
           _finished(false),
           _data_page_builder(nullptr),
           _dict_builder(nullptr),
-          _encoding_type(DICT_ENCODING) {
+          _encoding_type(DICT_ENCODING) {}
+
+Status BinaryDictPageBuilder::init() {
     // initially use DICT_ENCODING
     // TODO: the data page builder type can be created by Factory according to 
user config
-    _data_page_builder.reset(new 
BitshufflePageBuilder<FieldType::OLAP_FIELD_TYPE_INT>(options));
+    PageBuilder* data_page_builder_ptr = nullptr;
+    
RETURN_IF_ERROR(BitshufflePageBuilder<FieldType::OLAP_FIELD_TYPE_INT>::create(
+            &data_page_builder_ptr, _options));
+    _data_page_builder.reset(data_page_builder_ptr);
     PageBuilderOptions dict_builder_options;
     dict_builder_options.data_page_size =
             std::min(_options.data_page_size, _options.dict_page_size);
     dict_builder_options.is_dict_page = true;
-    _dict_builder.reset(
-            new 
BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>(dict_builder_options));
-    reset();
+
+    PageBuilder* dict_builder_ptr = nullptr;
+    
RETURN_IF_ERROR(BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>::create(
+            &dict_builder_ptr, dict_builder_options));
+    
_dict_builder.reset(static_cast<BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>*>(
+            dict_builder_ptr));
+    return reset();
 }
 
 bool BinaryDictPageBuilder::is_page_full() {
@@ -148,18 +157,21 @@ OwnedSlice BinaryDictPageBuilder::finish() {
     return _buffer.build();
 }
 
-void BinaryDictPageBuilder::reset() {
+Status BinaryDictPageBuilder::reset() {
     _finished = false;
     _buffer.reserve(_options.data_page_size + BINARY_DICT_PAGE_HEADER_SIZE);
     _buffer.resize(BINARY_DICT_PAGE_HEADER_SIZE);
 
     if (_encoding_type == DICT_ENCODING && _dict_builder->is_page_full()) {
-        _data_page_builder.reset(
-                new 
BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>(_options));
+        PageBuilder* data_page_builder_ptr = nullptr;
+        
RETURN_IF_ERROR(BinaryPlainPageBuilder<FieldType::OLAP_FIELD_TYPE_VARCHAR>::create(
+                &data_page_builder_ptr, _options));
+        _data_page_builder.reset(data_page_builder_ptr);
         _encoding_type = PLAIN_ENCODING;
     } else {
-        _data_page_builder->reset();
+        RETURN_IF_ERROR(_data_page_builder->reset());
     }
+    return Status::OK();
 }
 
 size_t BinaryDictPageBuilder::count() const {
diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.h 
b/be/src/olap/rowset/segment_v2/binary_dict_page.h
index 1592a927e5f..2a8467e7def 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.h
@@ -57,9 +57,12 @@ enum { BINARY_DICT_PAGE_HEADER_SIZE = 4 };
 // Data pages start with mode_ = DICT_ENCODING, when the size of dictionary
 // page go beyond the option_->dict_page_size, the subsequent data pages will 
switch
 // to string plain page automatically.
-class BinaryDictPageBuilder : public PageBuilder {
+class BinaryDictPageBuilder : public PageBuilderHelper<BinaryDictPageBuilder> {
 public:
-    BinaryDictPageBuilder(const PageBuilderOptions& options);
+    using Self = BinaryDictPageBuilder;
+    friend class PageBuilderHelper<Self>;
+
+    Status init() override;
 
     bool is_page_full() override;
 
@@ -67,7 +70,7 @@ public:
 
     OwnedSlice finish() override;
 
-    void reset() override;
+    Status reset() override;
 
     size_t count() const override;
 
@@ -80,6 +83,8 @@ public:
     Status get_last_value(void* value) const override;
 
 private:
+    BinaryDictPageBuilder(const PageBuilderOptions& options);
+
     PageBuilderOptions _options;
     bool _finished;
 
diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h 
b/be/src/olap/rowset/segment_v2/binary_plain_page.h
index 7bc5e020a83..3fe76c5d3ae 100644
--- a/be/src/olap/rowset/segment_v2/binary_plain_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h
@@ -44,12 +44,12 @@ namespace doris {
 namespace segment_v2 {
 
 template <FieldType Type>
-class BinaryPlainPageBuilder : public PageBuilder {
+class BinaryPlainPageBuilder : public 
PageBuilderHelper<BinaryPlainPageBuilder<Type>> {
 public:
-    BinaryPlainPageBuilder(const PageBuilderOptions& options)
-            : _size_estimate(0), _options(options) {
-        reset();
-    }
+    using Self = BinaryPlainPageBuilder<Type>;
+    friend class PageBuilderHelper<Self>;
+
+    Status init() override { return reset(); }
 
     bool is_page_full() override {
         bool ret = false;
@@ -108,7 +108,7 @@ public:
         return _buffer.build();
     }
 
-    void reset() override {
+    Status reset() override {
         _offsets.clear();
         _buffer.clear();
         _buffer.reserve(_options.data_page_size == 0
@@ -117,6 +117,7 @@ public:
         _size_estimate = sizeof(uint32_t);
         _finished = false;
         _last_value_size = 0;
+        return Status::OK();
     }
 
     size_t count() const override { return _offsets.size(); }
@@ -151,6 +152,9 @@ public:
     inline Slice get(std::size_t idx) const { return (*this)[idx]; }
 
 private:
+    BinaryPlainPageBuilder(const PageBuilderOptions& options)
+            : _size_estimate(0), _options(options) {}
+
     void _copy_value_at(size_t idx, faststring* value) const {
         size_t value_size =
                 (idx < _offsets.size() - 1) ? _offsets[idx + 1] - 
_offsets[idx] : _last_value_size;
diff --git a/be/src/olap/rowset/segment_v2/binary_prefix_page.h 
b/be/src/olap/rowset/segment_v2/binary_prefix_page.h
index 26bdb1518d5..de4ec60070b 100644
--- a/be/src/olap/rowset/segment_v2/binary_prefix_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_prefix_page.h
@@ -41,9 +41,12 @@ namespace segment_v2 {
 // Entry := SharedPrefixLength(vint), UnsharedLength(vint), Byte^UnsharedLength
 // Trailer := NumEntry(uint32_t), RESTART_POINT_INTERVAL(uint8_t)
 //            
RestartPointStartOffset(uint32_t)^NumRestartPoints,NumRestartPoints(uint32_t)
-class BinaryPrefixPageBuilder : public PageBuilder {
+class BinaryPrefixPageBuilder : public 
PageBuilderHelper<BinaryPrefixPageBuilder> {
 public:
-    BinaryPrefixPageBuilder(const PageBuilderOptions& options) : 
_options(options) { reset(); }
+    using Self = BinaryPrefixPageBuilder;
+    friend class PageBuilderHelper<Self>;
+
+    Status init() override { return reset(); }
 
     bool is_page_full() override { return size() >= _options.data_page_size; }
 
@@ -51,12 +54,13 @@ public:
 
     OwnedSlice finish() override;
 
-    void reset() override {
+    Status reset() override {
         _restart_points_offset.clear();
         _last_entry.clear();
         _count = 0;
         _buffer.clear();
         _finished = false;
+        return Status::OK();
     }
 
     uint64_t size() const override {
@@ -88,6 +92,8 @@ public:
     }
 
 private:
+    BinaryPrefixPageBuilder(const PageBuilderOptions& options) : 
_options(options) {}
+
     PageBuilderOptions _options;
     std::vector<uint32_t> _restart_points_offset;
     faststring _first_entry;
diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h 
b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
index 0e0f5132294..004e53f3e5c 100644
--- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h
+++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
@@ -84,12 +84,12 @@ void warn_with_bitshuffle_error(int64_t val);
 //    The header is followed by the bitshuffle-compressed element data.
 //
 template <FieldType Type>
-class BitshufflePageBuilder : public PageBuilder {
+class BitshufflePageBuilder : public 
PageBuilderHelper<BitshufflePageBuilder<Type>> {
 public:
-    BitshufflePageBuilder(const PageBuilderOptions& options)
-            : _options(options), _count(0), _remain_element_capacity(0), 
_finished(false) {
-        reset();
-    }
+    using Self = BitshufflePageBuilder<Type>;
+    friend class PageBuilderHelper<Self>;
+
+    Status init() override { return reset(); }
 
     bool is_page_full() override { return _remain_element_capacity == 0; }
 
@@ -149,7 +149,7 @@ public:
         return _finish(SIZE_OF_TYPE);
     }
 
-    void reset() override {
+    Status reset() override {
         auto block_size = _options.data_page_size;
         _count = 0;
         _data.clear();
@@ -160,6 +160,7 @@ public:
         _buffer.resize(BITSHUFFLE_PAGE_HEADER_SIZE);
         _finished = false;
         _remain_element_capacity = block_size / SIZE_OF_TYPE;
+        return Status::OK();
     }
 
     size_t count() const override { return _count; }
@@ -184,6 +185,9 @@ public:
     }
 
 private:
+    BitshufflePageBuilder(const PageBuilderOptions& options)
+            : _options(options), _count(0), _remain_element_capacity(0), 
_finished(false) {}
+
     OwnedSlice _finish(int final_size_of_type) {
         _data.resize(final_size_of_type * _count);
 
diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp 
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index 08a94091229..cbda176acd6 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -718,7 +718,7 @@ Status ScalarColumnWriter::finish_current_page() {
     // build data page body : encoded values + [nullmap]
     std::vector<Slice> body;
     OwnedSlice encoded_values = _page_builder->finish();
-    _page_builder->reset();
+    RETURN_IF_ERROR(_page_builder->reset());
     body.push_back(encoded_values.slice());
 
     OwnedSlice nullmap;
diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp 
b/be/src/olap/rowset/segment_v2/encoding_info.cpp
index f10aba5cd3b..717304637dd 100644
--- a/be/src/olap/rowset/segment_v2/encoding_info.cpp
+++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp
@@ -51,8 +51,7 @@ struct TypeEncodingTraits {};
 template <FieldType type, typename CppType>
 struct TypeEncodingTraits<type, PLAIN_ENCODING, CppType> {
     static Status create_page_builder(const PageBuilderOptions& opts, 
PageBuilder** builder) {
-        *builder = new PlainPageBuilder<type>(opts);
-        return Status::OK();
+        return PlainPageBuilder<type>::create(builder, opts);
     }
     static Status create_page_decoder(const Slice& data, const 
PageDecoderOptions& opts,
                                       PageDecoder** decoder) {
@@ -64,8 +63,7 @@ struct TypeEncodingTraits<type, PLAIN_ENCODING, CppType> {
 template <FieldType type>
 struct TypeEncodingTraits<type, PLAIN_ENCODING, Slice> {
     static Status create_page_builder(const PageBuilderOptions& opts, 
PageBuilder** builder) {
-        *builder = new BinaryPlainPageBuilder<type>(opts);
-        return Status::OK();
+        return BinaryPlainPageBuilder<type>::create(builder, opts);
     }
     static Status create_page_decoder(const Slice& data, const 
PageDecoderOptions& opts,
                                       PageDecoder** decoder) {
@@ -78,8 +76,7 @@ template <FieldType type, typename CppType>
 struct TypeEncodingTraits<type, BIT_SHUFFLE, CppType,
                           typename std::enable_if<!std::is_same<CppType, 
Slice>::value>::type> {
     static Status create_page_builder(const PageBuilderOptions& opts, 
PageBuilder** builder) {
-        *builder = new BitshufflePageBuilder<type>(opts);
-        return Status::OK();
+        return BitshufflePageBuilder<type>::create(builder, opts);
     }
     static Status create_page_decoder(const Slice& data, const 
PageDecoderOptions& opts,
                                       PageDecoder** decoder) {
@@ -91,8 +88,7 @@ struct TypeEncodingTraits<type, BIT_SHUFFLE, CppType,
 template <>
 struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_BOOL, RLE, bool> {
     static Status create_page_builder(const PageBuilderOptions& opts, 
PageBuilder** builder) {
-        *builder = new RlePageBuilder<FieldType::OLAP_FIELD_TYPE_BOOL>(opts);
-        return Status::OK();
+        return 
RlePageBuilder<FieldType::OLAP_FIELD_TYPE_BOOL>::create(builder, opts);
     }
     static Status create_page_decoder(const Slice& data, const 
PageDecoderOptions& opts,
                                       PageDecoder** decoder) {
@@ -104,8 +100,7 @@ struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_BOOL, 
RLE, bool> {
 template <FieldType type>
 struct TypeEncodingTraits<type, DICT_ENCODING, Slice> {
     static Status create_page_builder(const PageBuilderOptions& opts, 
PageBuilder** builder) {
-        *builder = new BinaryDictPageBuilder(opts);
-        return Status::OK();
+        return BinaryDictPageBuilder::create(builder, opts);
     }
     static Status create_page_decoder(const Slice& data, const 
PageDecoderOptions& opts,
                                       PageDecoder** decoder) {
@@ -118,8 +113,7 @@ template <>
 struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_DATE, FOR_ENCODING,
                           typename 
CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE>::CppType> {
     static Status create_page_builder(const PageBuilderOptions& opts, 
PageBuilder** builder) {
-        *builder = new 
FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATE>(opts);
-        return Status::OK();
+        return 
FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATE>::create(builder, 
opts);
     }
     static Status create_page_decoder(const Slice& data, const 
PageDecoderOptions& opts,
                                       PageDecoder** decoder) {
@@ -132,8 +126,8 @@ template <>
 struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_DATEV2, FOR_ENCODING,
                           typename 
CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATEV2>::CppType> {
     static Status create_page_builder(const PageBuilderOptions& opts, 
PageBuilder** builder) {
-        *builder = new 
FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATEV2>(opts);
-        return Status::OK();
+        return 
FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATEV2>::create(builder,
+                                                                               
       opts);
     }
     static Status create_page_decoder(const Slice& data, const 
PageDecoderOptions& opts,
                                       PageDecoder** decoder) {
@@ -146,8 +140,8 @@ template <>
 struct TypeEncodingTraits<FieldType::OLAP_FIELD_TYPE_DATETIMEV2, FOR_ENCODING,
                           typename 
CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>::CppType> {
     static Status create_page_builder(const PageBuilderOptions& opts, 
PageBuilder** builder) {
-        *builder = new 
FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>(opts);
-        return Status::OK();
+        return 
FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>::create(builder,
+                                                                               
           opts);
     }
     static Status create_page_decoder(const Slice& data, const 
PageDecoderOptions& opts,
                                       PageDecoder** decoder) {
@@ -161,8 +155,7 @@ template <FieldType type, typename CppType>
 struct TypeEncodingTraits<type, FOR_ENCODING, CppType,
                           typename 
std::enable_if<std::is_integral<CppType>::value>::type> {
     static Status create_page_builder(const PageBuilderOptions& opts, 
PageBuilder** builder) {
-        *builder = new FrameOfReferencePageBuilder<type>(opts);
-        return Status::OK();
+        return FrameOfReferencePageBuilder<type>::create(builder, opts);
     }
     static Status create_page_decoder(const Slice& data, const 
PageDecoderOptions& opts,
                                       PageDecoder** decoder) {
@@ -174,8 +167,7 @@ struct TypeEncodingTraits<type, FOR_ENCODING, CppType,
 template <FieldType type>
 struct TypeEncodingTraits<type, PREFIX_ENCODING, Slice> {
     static Status create_page_builder(const PageBuilderOptions& opts, 
PageBuilder** builder) {
-        *builder = new BinaryPrefixPageBuilder(opts);
-        return Status::OK();
+        return BinaryPrefixPageBuilder::create(builder, opts);
     }
     static Status create_page_decoder(const Slice& data, const 
PageDecoderOptions& opts,
                                       PageDecoder** decoder) {
diff --git a/be/src/olap/rowset/segment_v2/frame_of_reference_page.h 
b/be/src/olap/rowset/segment_v2/frame_of_reference_page.h
index 64e57afa988..4477912803b 100644
--- a/be/src/olap/rowset/segment_v2/frame_of_reference_page.h
+++ b/be/src/olap/rowset/segment_v2/frame_of_reference_page.h
@@ -27,11 +27,14 @@ namespace segment_v2 {
 
 // Encode page use frame-of-reference coding
 template <FieldType Type>
-class FrameOfReferencePageBuilder : public PageBuilder {
+class FrameOfReferencePageBuilder : public 
PageBuilderHelper<FrameOfReferencePageBuilder<Type>> {
 public:
-    explicit FrameOfReferencePageBuilder(const PageBuilderOptions& options)
-            : _options(options), _count(0), _finished(false) {
+    using Self = FrameOfReferencePageBuilder<Type>;
+    friend class PageBuilderHelper<Self>;
+
+    Status init() override {
         _encoder.reset(new ForEncoder<CppType>(&_buf));
+        return Status::OK();
     }
 
     bool is_page_full() override { return _encoder->len() >= 
_options.data_page_size; }
@@ -58,10 +61,11 @@ public:
         return _buf.build();
     }
 
-    void reset() override {
+    Status reset() override {
         _count = 0;
         _finished = false;
         _encoder->clear();
+        return Status::OK();
     }
 
     size_t count() const override { return _count; }
@@ -85,6 +89,9 @@ public:
     }
 
 private:
+    explicit FrameOfReferencePageBuilder(const PageBuilderOptions& options)
+            : _options(options), _count(0), _finished(false) {}
+
     typedef typename TypeTraits<Type>::CppType CppType;
     PageBuilderOptions _options;
     size_t _count;
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp 
b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
index 003670796c2..e1b238084a9 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
@@ -118,7 +118,7 @@ Status 
IndexedColumnWriter::_finish_current_data_page(size_t& num_val) {
 
     // IndexedColumn doesn't have NULLs, thus data page body only contains 
encoded values
     OwnedSlice page_body = _data_page_builder->finish();
-    _data_page_builder->reset();
+    RETURN_IF_ERROR(_data_page_builder->reset());
 
     PageFooterPB footer;
     footer.set_type(DATA_PAGE);
diff --git a/be/src/olap/rowset/segment_v2/page_builder.h 
b/be/src/olap/rowset/segment_v2/page_builder.h
index 5df2ce949b9..61fa2eaf8e1 100644
--- a/be/src/olap/rowset/segment_v2/page_builder.h
+++ b/be/src/olap/rowset/segment_v2/page_builder.h
@@ -19,6 +19,7 @@
 
 #include <stdint.h>
 
+#include <memory>
 #include <vector>
 
 #include "common/status.h"
@@ -42,6 +43,9 @@ public:
 
     virtual ~PageBuilder() {}
 
+    // Init the internal state of the page builder.
+    virtual Status init() = 0;
+
     // Used by column writer to determine whether the current page is full.
     // Column writer depends on the result to decide whether to flush current 
page.
     virtual bool is_page_full() = 0;
@@ -69,7 +73,7 @@ public:
     // Reset the internal state of the page builder.
     //
     // Any data previously returned by finish may be invalidated by this call.
-    virtual void reset() = 0;
+    virtual Status reset() = 0;
 
     // Return the number of entries that have been added to the page.
     virtual size_t count() const = 0;
@@ -91,5 +95,17 @@ private:
     DISALLOW_COPY_AND_ASSIGN(PageBuilder);
 };
 
+template <typename Derived>
+class PageBuilderHelper : public PageBuilder {
+public:
+    template <typename... Args>
+    static Status create(PageBuilder** builder, Args&&... args) {
+        std::unique_ptr<PageBuilder> builder_uniq_ptr(new 
Derived(std::forward<Args>(args)...));
+        RETURN_IF_ERROR(builder_uniq_ptr->init());
+        *builder = builder_uniq_ptr.release();
+        return Status::OK();
+    }
+};
+
 } // namespace segment_v2
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/plain_page.h 
b/be/src/olap/rowset/segment_v2/plain_page.h
index cb9236ee315..cbcc96f31ba 100644
--- a/be/src/olap/rowset/segment_v2/plain_page.h
+++ b/be/src/olap/rowset/segment_v2/plain_page.h
@@ -31,13 +31,16 @@ namespace segment_v2 {
 static const size_t PLAIN_PAGE_HEADER_SIZE = sizeof(uint32_t);
 
 template <FieldType Type>
-class PlainPageBuilder : public PageBuilder {
+class PlainPageBuilder : public PageBuilderHelper<PlainPageBuilder<Type> > {
 public:
-    PlainPageBuilder(const PageBuilderOptions& options) : _options(options) {
+    using Self = PlainPageBuilder<Type>;
+    friend class PageBuilderHelper<Self>;
+
+    Status init() override {
         // Reserve enough space for the page, plus a bit of slop since
         // we often overrun the page by a few values.
         _buffer.reserve(_options.data_page_size + 1024);
-        reset();
+        return reset();
     }
 
     bool is_page_full() override { return _buffer.size() > 
_options.data_page_size; }
@@ -66,11 +69,12 @@ public:
         return _buffer.build();
     }
 
-    void reset() override {
+    Status reset() override {
         _buffer.reserve(_options.data_page_size + 1024);
         _count = 0;
         _buffer.clear();
         _buffer.resize(PLAIN_PAGE_HEADER_SIZE);
+        return Status::OK();
     }
 
     size_t count() const override { return _count; }
@@ -94,6 +98,8 @@ public:
     }
 
 private:
+    PlainPageBuilder(const PageBuilderOptions& options) : _options(options) {}
+
     faststring _buffer;
     PageBuilderOptions _options;
     size_t _count;
diff --git a/be/src/olap/rowset/segment_v2/rle_page.h 
b/be/src/olap/rowset/segment_v2/rle_page.h
index bdc94a7080a..40ec587743c 100644
--- a/be/src/olap/rowset/segment_v2/rle_page.h
+++ b/be/src/olap/rowset/segment_v2/rle_page.h
@@ -51,10 +51,12 @@ enum { RLE_PAGE_HEADER_SIZE = 4 };
 //
 // TODO(hkp): optimize rle algorithm
 template <FieldType Type>
-class RlePageBuilder : public PageBuilder {
+class RlePageBuilder : public PageBuilderHelper<RlePageBuilder<Type> > {
 public:
-    RlePageBuilder(const PageBuilderOptions& options)
-            : _options(options), _count(0), _finished(false), _bit_width(0), 
_rle_encoder(nullptr) {
+    using Self = RlePageBuilder<Type>;
+    friend class PageBuilderHelper<Self>;
+
+    Status init() override {
         switch (Type) {
         case FieldType::OLAP_FIELD_TYPE_BOOL: {
             _bit_width = 1;
@@ -66,7 +68,7 @@ public:
         }
         }
         _rle_encoder = new RleEncoder<CppType>(&_buf, _bit_width);
-        reset();
+        return reset();
     }
 
     ~RlePageBuilder() { delete _rle_encoder; }
@@ -102,11 +104,12 @@ public:
         return _buf.build();
     }
 
-    void reset() override {
+    Status reset() override {
         _count = 0;
         _finished = false;
         _rle_encoder->Clear();
         _rle_encoder->Reserve(RLE_PAGE_HEADER_SIZE, 0);
+        return Status::OK();
     }
 
     size_t count() const override { return _count; }
@@ -132,6 +135,13 @@ public:
     }
 
 private:
+    RlePageBuilder(const PageBuilderOptions& options)
+            : _options(options),
+              _count(0),
+              _finished(false),
+              _bit_width(0),
+              _rle_encoder(nullptr) {}
+
     typedef typename TypeTraits<Type>::CppType CppType;
     enum { SIZE_OF_TYPE = TypeTraits<Type>::size };
 
diff --git a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp 
b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
index 87097fe1aa1..e1b40508485 100644
--- a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
@@ -43,6 +43,8 @@ public:
         options.data_page_size = 256 * 1024;
         options.dict_page_size = 256 * 1024;
         BinaryDictPageBuilder page_builder(options);
+        Status ret0 = page_builder.init();
+        EXPECT_TRUE(ret0.ok());
         size_t count = slices.size();
 
         const Slice* ptr = &slices[0];
@@ -132,6 +134,8 @@ public:
         options.data_page_size = 1 * 1024 * 1024;
         options.dict_page_size = 1 * 1024 * 1024;
         BinaryDictPageBuilder page_builder(options);
+        Status ret0 = page_builder.init();
+        EXPECT_TRUE(ret0.ok());
         size_t count = contents.size();
         std::vector<OwnedSlice> results;
         std::vector<size_t> page_start_ids;
diff --git a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp 
b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
index 45d824c731e..7a7cf7aa23c 100644
--- a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
@@ -47,6 +47,8 @@ public:
         PageBuilderOptions options;
         options.data_page_size = 256 * 1024;
         PageBuilderType page_builder(options);
+        Status ret0 = page_builder.init();
+        EXPECT_TRUE(ret0.ok());
         size_t count = slices.size();
 
         Slice* ptr = &slices[0];
diff --git a/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp 
b/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp
index e6aefd49e16..e87c15fff09 100644
--- a/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp
@@ -48,6 +48,8 @@ public:
         // encode
         PageBuilderOptions options;
         BinaryPrefixPageBuilder page_builder(options);
+        Status ret0 = page_builder.init();
+        EXPECT_TRUE(ret0.ok());
 
         size_t count = slices.size();
         const Slice* ptr = &slices[0];
@@ -162,6 +164,8 @@ public:
         // encode
         PageBuilderOptions options;
         BinaryPrefixPageBuilder page_builder(options);
+        Status ret0 = page_builder.init();
+        EXPECT_TRUE(ret0.ok());
 
         size_t count = slices.size();
         const Slice* ptr = &slices[0];
@@ -273,6 +277,8 @@ public:
         // encode
         PageBuilderOptions options;
         BinaryPrefixPageBuilder page_builder(options);
+        Status ret0 = page_builder.init();
+        EXPECT_TRUE(ret0.ok());
 
         size_t count = slices.size();
         const Slice* ptr = &slices[0];
diff --git a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp 
b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp
index 75d09363ee2..2165e9d19f0 100644
--- a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp
@@ -56,6 +56,8 @@ public:
         PageBuilderOptions options;
         options.data_page_size = 256 * 1024;
         PageBuilderType page_builder(options);
+        Status ret0 = page_builder.init();
+        EXPECT_TRUE(ret0.ok());
 
         page_builder.add(reinterpret_cast<const uint8_t*>(src), &size);
         OwnedSlice s = page_builder.finish();
@@ -121,6 +123,8 @@ public:
         PageBuilderOptions options;
         options.data_page_size = 256 * 1024;
         PageBuilderType page_builder(options);
+        Status ret0 = page_builder.init();
+        EXPECT_TRUE(ret0.ok());
 
         page_builder.add(reinterpret_cast<const uint8_t*>(src), &size);
         OwnedSlice s = page_builder.finish();
diff --git a/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp 
b/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp
index 500d638aa69..f2bbd87a9b7 100644
--- a/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp
@@ -50,6 +50,8 @@ public:
         PageBuilderOptions builder_options;
         builder_options.data_page_size = 256 * 1024;
         PageBuilderType for_page_builder(builder_options);
+        Status ret0 = for_page_builder.init();
+        EXPECT_TRUE(ret0.ok());
         for_page_builder.add(reinterpret_cast<const uint8_t*>(src), &size);
         OwnedSlice s = for_page_builder.finish();
         EXPECT_EQ(size, for_page_builder.count());
@@ -234,6 +236,8 @@ TEST_F(FrameOfReferencePageTest, 
TestInt32SequenceBlockEncoderSize) {
     builder_options.data_page_size = 256 * 1024;
     segment_v2::FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_INT> 
page_builder(
             builder_options);
+    Status ret0 = page_builder.init();
+    EXPECT_TRUE(ret0.ok());
     page_builder.add(reinterpret_cast<const uint8_t*>(ints.get()), &size);
     OwnedSlice s = page_builder.finish();
     // body: 4 bytes min value + 128 * 1 /8 packing value = 20
@@ -251,6 +255,8 @@ TEST_F(FrameOfReferencePageTest, TestFirstLastValue) {
     builder_options.data_page_size = 256 * 1024;
     segment_v2::FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_INT> 
page_builder(
             builder_options);
+    Status ret0 = page_builder.init();
+    EXPECT_TRUE(ret0.ok());
     page_builder.add(reinterpret_cast<const uint8_t*>(ints.get()), &size);
     OwnedSlice s = page_builder.finish();
     int32_t first_value = -1;
@@ -271,6 +277,8 @@ TEST_F(FrameOfReferencePageTest, 
TestInt32NormalBlockEncoderSize) {
     builder_options.data_page_size = 256 * 1024;
     segment_v2::FrameOfReferencePageBuilder<FieldType::OLAP_FIELD_TYPE_INT> 
page_builder(
             builder_options);
+    Status ret0 = page_builder.init();
+    EXPECT_TRUE(ret0.ok());
     page_builder.add(reinterpret_cast<const uint8_t*>(ints.get()), &size);
     OwnedSlice s = page_builder.finish();
     // body: 4 bytes min value + 128 * 7 /8 packing value = 116
diff --git a/be/test/olap/rowset/segment_v2/plain_page_test.cpp 
b/be/test/olap/rowset/segment_v2/plain_page_test.cpp
index bd18215522c..cd1e3163416 100644
--- a/be/test/olap/rowset/segment_v2/plain_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/plain_page_test.cpp
@@ -64,6 +64,8 @@ public:
         PageBuilderOptions options;
         options.data_page_size = 256 * 1024;
         PageBuilderType page_builder(options);
+        Status ret0 = page_builder.init();
+        EXPECT_TRUE(ret0.ok());
 
         page_builder.add(reinterpret_cast<const uint8_t*>(src), &size);
         OwnedSlice s = page_builder.finish();
@@ -120,6 +122,8 @@ public:
         PageBuilderOptions options;
         options.data_page_size = 256 * 1024;
         PageBuilderType page_builder(options);
+        Status ret0 = page_builder.init();
+        EXPECT_TRUE(ret0.ok());
 
         page_builder.add(reinterpret_cast<const uint8_t*>(src), &size);
         OwnedSlice s = page_builder.finish();
diff --git a/be/test/olap/rowset/segment_v2/rle_page_test.cpp 
b/be/test/olap/rowset/segment_v2/rle_page_test.cpp
index ae7996f4a43..c97d70f9ac6 100644
--- a/be/test/olap/rowset/segment_v2/rle_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/rle_page_test.cpp
@@ -54,6 +54,8 @@ public:
         PageBuilderOptions builder_options;
         builder_options.data_page_size = 256 * 1024;
         PageBuilderType rle_page_builder(builder_options);
+        Status ret0 = rle_page_builder.init();
+        EXPECT_TRUE(ret0.ok());
         rle_page_builder.add(reinterpret_cast<const uint8_t*>(src), &size);
         OwnedSlice s = rle_page_builder.finish();
         EXPECT_EQ(size, rle_page_builder.count());
@@ -155,6 +157,8 @@ TEST_F(RlePageTest, TestRleInt32BlockEncoderSize) {
     PageBuilderOptions builder_options;
     builder_options.data_page_size = 256 * 1024;
     segment_v2::RlePageBuilder<FieldType::OLAP_FIELD_TYPE_INT> 
rle_page_builder(builder_options);
+    Status ret0 = rle_page_builder.init();
+    EXPECT_TRUE(ret0.ok());
     rle_page_builder.add(reinterpret_cast<const uint8_t*>(ints.get()), &size);
     OwnedSlice s = rle_page_builder.finish();
     // 4 bytes header
@@ -191,6 +195,8 @@ TEST_F(RlePageTest, TestRleBoolBlockEncoderSize) {
     PageBuilderOptions builder_options;
     builder_options.data_page_size = 256 * 1024;
     segment_v2::RlePageBuilder<FieldType::OLAP_FIELD_TYPE_BOOL> 
rle_page_builder(builder_options);
+    Status ret0 = rle_page_builder.init();
+    EXPECT_TRUE(ret0.ok());
     rle_page_builder.add(reinterpret_cast<const uint8_t*>(bools.get()), &size);
     OwnedSlice s = rle_page_builder.finish();
     // 4 bytes header


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to