This is an automated email from the ASF dual-hosted git repository.
wangbo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new fb3c00c943 [Improvement](storage) reuse schema and rowblockv2 on
single scanner_thread (#11392)
fb3c00c943 is described below
commit fb3c00c9434e5b09e083206434e8662789be5df7
Author: Userwhite <[email protected]>
AuthorDate: Wed Aug 24 13:42:10 2022 +0800
[Improvement](storage) reuse schema and rowblockv2 on single scanner_thread
(#11392)
* support reuse rowblockv2 on single thread
---
be/src/olap/rowset/beta_rowset_reader.cpp | 54 +++++++++++++++++++++++-------
be/src/olap/rowset/beta_rowset_reader.h | 4 +--
be/src/olap/rowset/rowset_reader_context.h | 2 ++
3 files changed, 45 insertions(+), 15 deletions(-)
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp
b/be/src/olap/rowset/beta_rowset_reader.cpp
index e1276d3fa8..51b74c2525 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -59,27 +59,46 @@ Status BetaRowsetReader::init(RowsetReaderContext*
read_context) {
read_context->is_upper_keys_included->at(i));
}
}
+
+ bool can_reuse_schema = true;
// delete_hanlder is always set, but it maybe not init, so that it will
return empty conditions
// or predicates when it is not inited.
if (read_context->delete_handler != nullptr) {
read_context->delete_handler->get_delete_conditions_after_version(
_rowset->end_version(), &read_options.delete_conditions,
read_options.delete_condition_predicates.get());
+ // if del cond is not empty, schema may be different in multiple rowset
+ can_reuse_schema = read_options.delete_conditions.empty();
}
- std::vector<uint32_t> read_columns;
- std::set<uint32_t> read_columns_set;
- std::set<uint32_t> delete_columns_set;
- for (int i = 0; i < _context->return_columns->size(); ++i) {
- read_columns.push_back(_context->return_columns->at(i));
- read_columns_set.insert(_context->return_columns->at(i));
- }
-
read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set);
- for (auto cid : delete_columns_set) {
- if (read_columns_set.find(cid) == read_columns_set.end()) {
- read_columns.push_back(cid);
+
+ if (!can_reuse_schema || _context->reuse_input_schema == nullptr) {
+ std::vector<uint32_t> read_columns;
+ std::set<uint32_t> read_columns_set;
+ std::set<uint32_t> delete_columns_set;
+ for (int i = 0; i < _context->return_columns->size(); ++i) {
+ read_columns.push_back(_context->return_columns->at(i));
+ read_columns_set.insert(_context->return_columns->at(i));
+ }
+
read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set);
+ for (auto cid : delete_columns_set) {
+ if (read_columns_set.find(cid) == read_columns_set.end()) {
+ read_columns.push_back(cid);
+ }
+ }
+ _input_schema =
std::make_shared<Schema>(_context->tablet_schema->columns(), read_columns);
+
+ if (can_reuse_schema) {
+ _context->reuse_input_schema = _input_schema;
}
}
- _input_schema =
std::make_unique<Schema>(_context->tablet_schema->columns(), read_columns);
+
+ // if can reuse schema, context must have reuse_input_schema
+ // if can't reuse schema, context mustn't have reuse_input_schema
+ DCHECK(can_reuse_schema ^ (_context->reuse_input_schema == nullptr));
+ if (_context->reuse_input_schema != nullptr && _input_schema == nullptr) {
+ _input_schema = _context->reuse_input_schema;
+ }
+
if (read_context->predicates != nullptr) {
read_options.column_predicates.insert(read_options.column_predicates.end(),
read_context->predicates->begin(),
@@ -171,7 +190,16 @@ Status BetaRowsetReader::init(RowsetReaderContext*
read_context) {
_iterator.reset(final_iterator);
// init input block
- _input_block.reset(new RowBlockV2(*_input_schema, std::min(1024,
read_context->batch_size)));
+ if (can_reuse_schema) {
+ if (read_context->reuse_block == nullptr) {
+ read_context->reuse_block.reset(
+ new RowBlockV2(*_input_schema, std::min(1024,
read_context->batch_size)));
+ }
+ _input_block = read_context->reuse_block;
+ } else {
+ _input_block.reset(
+ new RowBlockV2(*_input_schema, std::min(1024,
read_context->batch_size)));
+ }
if (!read_context->is_vec) {
// init input/output block and row
diff --git a/be/src/olap/rowset/beta_rowset_reader.h
b/be/src/olap/rowset/beta_rowset_reader.h
index 9cf9ef599b..b987efc9ad 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -66,7 +66,7 @@ public:
private:
bool _should_push_down_value_predicates() const;
- std::unique_ptr<Schema> _input_schema;
+ std::shared_ptr<Schema> _input_schema;
RowsetReaderContext* _context;
BetaRowsetSharedPtr _rowset;
@@ -75,7 +75,7 @@ private:
std::unique_ptr<RowwiseIterator> _iterator;
- std::unique_ptr<RowBlockV2> _input_block;
+ std::shared_ptr<RowBlockV2> _input_block;
std::unique_ptr<RowBlock> _output_block;
std::unique_ptr<RowCursor> _row;
diff --git a/be/src/olap/rowset/rowset_reader_context.h
b/be/src/olap/rowset/rowset_reader_context.h
index dcb56f8d6a..effa319d30 100644
--- a/be/src/olap/rowset/rowset_reader_context.h
+++ b/be/src/olap/rowset/rowset_reader_context.h
@@ -73,6 +73,8 @@ struct RowsetReaderContext {
bool enable_unique_key_merge_on_write = false;
const DeleteBitmap* delete_bitmap = nullptr;
bool record_rowids = false;
+ std::shared_ptr<RowBlockV2> reuse_block;
+ std::shared_ptr<Schema> reuse_input_schema;
};
} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]