This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new a986c181c2 [improvement](vertical compaction) cache segment in
vertical compaction (#16101)
a986c181c2 is described below
commit a986c181c2490fb405075a8bbee8fd7c1f2dd54f
Author: yixiutt <[email protected]>
AuthorDate: Fri Jan 20 16:38:23 2023 +0800
[improvement](vertical compaction) cache segment in vertical compaction
(#16101)
1.In vertical compaction, segments will be loaded for every column group, so
we should cache segment ptr to avoid too many repeated io.
2.fix vertical compaction data size bug
---
be/src/olap/rowset/beta_rowset_reader.cpp | 10 ++++++----
be/src/olap/rowset/beta_rowset_reader.h | 3 ++-
be/src/olap/rowset/rowset_reader.h | 3 ++-
be/src/olap/rowset/segment_v2/segment_writer.cpp | 1 +
be/src/vec/olap/vertical_block_reader.cpp | 4 +++-
5 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp
b/be/src/olap/rowset/beta_rowset_reader.cpp
index eae3f403d1..8fb0fca4a9 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -45,7 +45,8 @@ void BetaRowsetReader::reset_read_options() {
}
Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext*
read_context,
- std::vector<RowwiseIterator*>*
out_iters) {
+ std::vector<RowwiseIterator*>*
out_iters,
+ bool use_cache) {
RETURN_NOT_OK(_rowset->load());
_context = read_context;
if (_context->stats != nullptr) {
@@ -162,9 +163,10 @@ Status
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
_read_options.io_ctx.reader_type = read_context->reader_type;
// load segments
- RETURN_NOT_OK(SegmentLoader::instance()->load_segments(
- _rowset, &_segment_cache_handle,
- read_context->reader_type == ReaderType::READER_QUERY));
+ // use cache is true when do vertica compaction
+ bool should_use_cache = use_cache || read_context->reader_type ==
ReaderType::READER_QUERY;
+ RETURN_NOT_OK(SegmentLoader::instance()->load_segments(_rowset,
&_segment_cache_handle,
+ should_use_cache));
// create iterator for each segment
std::vector<std::unique_ptr<RowwiseIterator>> seg_iterators;
diff --git a/be/src/olap/rowset/beta_rowset_reader.h
b/be/src/olap/rowset/beta_rowset_reader.h
index e2f888cb12..d09cf5445f 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -36,7 +36,8 @@ public:
Status init(RowsetReaderContext* read_context) override;
Status get_segment_iterators(RowsetReaderContext* read_context,
- std::vector<RowwiseIterator*>* out_iters)
override;
+ std::vector<RowwiseIterator*>* out_iters,
+ bool use_cache = false) override;
void reset_read_options() override;
// It's ok, because we only get ref here, the block's owner is this reader.
diff --git a/be/src/olap/rowset/rowset_reader.h
b/be/src/olap/rowset/rowset_reader.h
index a189ef73dc..98c423119a 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -45,7 +45,8 @@ public:
virtual Status init(RowsetReaderContext* read_context) = 0;
virtual Status get_segment_iterators(RowsetReaderContext* read_context,
- std::vector<RowwiseIterator*>*
out_iters) = 0;
+ std::vector<RowwiseIterator*>*
out_iters,
+ bool use_cache = false) = 0;
virtual void reset_read_options() = 0;
// read next block data into *block.
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 1efe66f97a..025bfed484 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -382,6 +382,7 @@ Status SegmentWriter::finalize_columns(uint64_t*
index_size) {
Status SegmentWriter::finalize_footer(uint64_t* segment_file_size) {
RETURN_IF_ERROR(_write_footer());
+ // finish
RETURN_IF_ERROR(_file_writer->finalize());
*segment_file_size = _file_writer->bytes_appended();
return Status::OK();
diff --git a/be/src/vec/olap/vertical_block_reader.cpp
b/be/src/vec/olap/vertical_block_reader.cpp
index 1a1271b047..42abc8a898 100644
--- a/be/src/vec/olap/vertical_block_reader.cpp
+++ b/be/src/vec/olap/vertical_block_reader.cpp
@@ -53,7 +53,9 @@ Status VerticalBlockReader::_get_segment_iterators(const
ReaderParams& read_para
_reader_context.is_vertical_compaction = true;
for (auto& rs_reader : rs_readers) {
// segment iterator will be inited here
- RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context,
segment_iters));
+ // In vertical compaction, every group will load segment so we should
cache
+ // segment to avoid tot many s3 head request
+ RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context,
segment_iters, true));
// if segments overlapping, all segment iterator should be inited in
// heap merge iterator. If segments are none overlapping, only first
segment of this
// rowset will be inited and push to heap, other segment will be
inited later when current
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]