This is an automated email from the ASF dual-hosted git repository.

wangbo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new fb3c00c943 [Improvement](storage) reuse schema and rowblockv2 on 
single scanner_thread (#11392)
fb3c00c943 is described below

commit fb3c00c9434e5b09e083206434e8662789be5df7
Author: Userwhite <[email protected]>
AuthorDate: Wed Aug 24 13:42:10 2022 +0800

    [Improvement](storage) reuse schema and rowblockv2 on single scanner_thread 
(#11392)
    
    * support reuse rowblockv2 on single thread
---
 be/src/olap/rowset/beta_rowset_reader.cpp  | 54 +++++++++++++++++++++++-------
 be/src/olap/rowset/beta_rowset_reader.h    |  4 +--
 be/src/olap/rowset/rowset_reader_context.h |  2 ++
 3 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp 
b/be/src/olap/rowset/beta_rowset_reader.cpp
index e1276d3fa8..51b74c2525 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -59,27 +59,46 @@ Status BetaRowsetReader::init(RowsetReaderContext* 
read_context) {
                                                  
read_context->is_upper_keys_included->at(i));
         }
     }
+
+    bool can_reuse_schema = true;
     // delete_hanlder is always set, but it maybe not init, so that it will 
return empty conditions
     // or predicates when it is not inited.
     if (read_context->delete_handler != nullptr) {
         read_context->delete_handler->get_delete_conditions_after_version(
                 _rowset->end_version(), &read_options.delete_conditions,
                 read_options.delete_condition_predicates.get());
+        // if del cond is not empty, schema may be different in multiple rowset
+        can_reuse_schema = read_options.delete_conditions.empty();
     }
-    std::vector<uint32_t> read_columns;
-    std::set<uint32_t> read_columns_set;
-    std::set<uint32_t> delete_columns_set;
-    for (int i = 0; i < _context->return_columns->size(); ++i) {
-        read_columns.push_back(_context->return_columns->at(i));
-        read_columns_set.insert(_context->return_columns->at(i));
-    }
-    
read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set);
-    for (auto cid : delete_columns_set) {
-        if (read_columns_set.find(cid) == read_columns_set.end()) {
-            read_columns.push_back(cid);
+
+    if (!can_reuse_schema || _context->reuse_input_schema == nullptr) {
+        std::vector<uint32_t> read_columns;
+        std::set<uint32_t> read_columns_set;
+        std::set<uint32_t> delete_columns_set;
+        for (int i = 0; i < _context->return_columns->size(); ++i) {
+            read_columns.push_back(_context->return_columns->at(i));
+            read_columns_set.insert(_context->return_columns->at(i));
+        }
+        
read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set);
+        for (auto cid : delete_columns_set) {
+            if (read_columns_set.find(cid) == read_columns_set.end()) {
+                read_columns.push_back(cid);
+            }
+        }
+        _input_schema = 
std::make_shared<Schema>(_context->tablet_schema->columns(), read_columns);
+
+        if (can_reuse_schema) {
+            _context->reuse_input_schema = _input_schema;
         }
     }
-    _input_schema = 
std::make_unique<Schema>(_context->tablet_schema->columns(), read_columns);
+
+    // if can reuse schema, context must have reuse_input_schema
+    // if can't reuse schema, context mustn't have reuse_input_schema
+    DCHECK(can_reuse_schema ^ (_context->reuse_input_schema == nullptr));
+    if (_context->reuse_input_schema != nullptr && _input_schema == nullptr) {
+        _input_schema = _context->reuse_input_schema;
+    }
+
     if (read_context->predicates != nullptr) {
         
read_options.column_predicates.insert(read_options.column_predicates.end(),
                                               
read_context->predicates->begin(),
@@ -171,7 +190,16 @@ Status BetaRowsetReader::init(RowsetReaderContext* 
read_context) {
     _iterator.reset(final_iterator);
 
     // init input block
-    _input_block.reset(new RowBlockV2(*_input_schema, std::min(1024, 
read_context->batch_size)));
+    if (can_reuse_schema) {
+        if (read_context->reuse_block == nullptr) {
+            read_context->reuse_block.reset(
+                    new RowBlockV2(*_input_schema, std::min(1024, 
read_context->batch_size)));
+        }
+        _input_block = read_context->reuse_block;
+    } else {
+        _input_block.reset(
+                new RowBlockV2(*_input_schema, std::min(1024, 
read_context->batch_size)));
+    }
 
     if (!read_context->is_vec) {
         // init input/output block and row
diff --git a/be/src/olap/rowset/beta_rowset_reader.h 
b/be/src/olap/rowset/beta_rowset_reader.h
index 9cf9ef599b..b987efc9ad 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -66,7 +66,7 @@ public:
 private:
     bool _should_push_down_value_predicates() const;
 
-    std::unique_ptr<Schema> _input_schema;
+    std::shared_ptr<Schema> _input_schema;
     RowsetReaderContext* _context;
     BetaRowsetSharedPtr _rowset;
 
@@ -75,7 +75,7 @@ private:
 
     std::unique_ptr<RowwiseIterator> _iterator;
 
-    std::unique_ptr<RowBlockV2> _input_block;
+    std::shared_ptr<RowBlockV2> _input_block;
     std::unique_ptr<RowBlock> _output_block;
     std::unique_ptr<RowCursor> _row;
 
diff --git a/be/src/olap/rowset/rowset_reader_context.h 
b/be/src/olap/rowset/rowset_reader_context.h
index dcb56f8d6a..effa319d30 100644
--- a/be/src/olap/rowset/rowset_reader_context.h
+++ b/be/src/olap/rowset/rowset_reader_context.h
@@ -73,6 +73,8 @@ struct RowsetReaderContext {
     bool enable_unique_key_merge_on_write = false;
     const DeleteBitmap* delete_bitmap = nullptr;
     bool record_rowids = false;
+    std::shared_ptr<RowBlockV2> reuse_block;
+    std::shared_ptr<Schema> reuse_input_schema;
 };
 
 } // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to