This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch compaction_opt
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/compaction_opt by this push:
new 1fc3947d9b [bugfix](ordered_compaction) compatiable for old rowset
without segment key bounds (#14357)
1fc3947d9b is described below
commit 1fc3947d9b9c92d66d7075867f6a40d6114d66c3
Author: yixiutt <[email protected]>
AuthorDate: Thu Nov 17 15:31:55 2022 +0800
[bugfix](ordered_compaction) compatiable for old rowset without segment key
bounds (#14357)
---
be/src/olap/compaction.cpp | 10 +++++++---
be/src/olap/rowset/rowset.h | 20 ++++++++++++++------
be/src/olap/rowset/rowset_meta.h | 15 +++++++++++----
be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 14 ++++++++++++--
be/src/olap/rowset/vertical_beta_rowset_writer.h | 3 ++-
5 files changed, 46 insertions(+), 16 deletions(-)
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index a79ef6ed56..ca477e7f0c 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -120,12 +120,16 @@ bool Compaction::is_rowset_tidy(std::string& pre_max_key,
const RowsetSharedPtr&
return false;
}
}
-
- auto min_key = rhs->min_key();
+ std::string min_key;
+ auto ret = rhs->min_key(&min_key);
+ if (!ret) {
+ return false;
+ }
if (min_key < pre_max_key) {
return false;
}
- pre_max_key = rhs->max_key();
+ DCHECK(rhs->max_key(&pre_max_key));
+
return true;
}
diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h
index 312fe62b1a..1e06b4b425 100644
--- a/be/src/olap/rowset/rowset.h
+++ b/be/src/olap/rowset/rowset.h
@@ -267,15 +267,23 @@ public:
_rowset_meta->get_segments_key_bounds(segments_key_bounds);
return Status::OK();
}
- std::string min_key() {
+ bool min_key(std::string* min_key) {
KeyBoundsPB key_bounds;
- _rowset_meta->get_first_segment_key_bound(&key_bounds);
- return key_bounds.min_key();
+ bool ret = _rowset_meta->get_first_segment_key_bound(&key_bounds);
+ if (!ret) {
+ return false;
+ }
+ *min_key = key_bounds.min_key();
+ return true;
}
- std::string max_key() {
+ bool max_key(std::string* max_key) {
KeyBoundsPB key_bounds;
- _rowset_meta->get_last_segment_key_bound(&key_bounds);
- return key_bounds.max_key();
+ bool ret = _rowset_meta->get_last_segment_key_bound(&key_bounds);
+ if (!ret) {
+ return false;
+ }
+ *max_key = key_bounds.max_key();
+ return true;
}
bool check_rowset_segment();
diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h
index a4a7059d39..8e4741c3dd 100644
--- a/be/src/olap/rowset/rowset_meta.h
+++ b/be/src/olap/rowset/rowset_meta.h
@@ -320,14 +320,21 @@ public:
segments_key_bounds->push_back(key_range);
}
}
- virtual void get_first_segment_key_bound(KeyBoundsPB* key_bounds) {
- DCHECK(_rowset_meta_pb.segments_key_bounds_size() > 0);
+ virtual bool get_first_segment_key_bound(KeyBoundsPB* key_bounds) {
+ // for compatibility, old version has not segment key bounds
+ if (_rowset_meta_pb.segments_key_bounds_size() == 0) {
+ return false;
+ }
*key_bounds = _rowset_meta_pb.segments_key_bounds(0);
+ return true;
}
- virtual void get_last_segment_key_bound(KeyBoundsPB* key_bounds) {
- DCHECK(_rowset_meta_pb.segments_key_bounds_size() > 0);
+ virtual bool get_last_segment_key_bound(KeyBoundsPB* key_bounds) {
+ if (_rowset_meta_pb.segments_key_bounds_size() == 0) {
+ return false;
+ }
*key_bounds =
_rowset_meta_pb.segments_key_bounds(_rowset_meta_pb.segments_key_bounds_size()
- 1);
+ return true;
}
void set_segments_key_bounds(const std::vector<KeyBoundsPB>&
segments_key_bounds) {
diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
index 2fb248f037..b6160e68ed 100644
--- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
@@ -60,7 +60,7 @@ Status VerticalBetaRowsetWriter::add_columns(const
vectorized::Block* block,
} else if (is_key) {
if (_segment_writers[_cur_writer_idx]->num_rows_written() >
max_rows_per_segment) {
// segment is full, need flush columns and create new segment
writer
-
RETURN_IF_ERROR(_flush_columns(&_segment_writers[_cur_writer_idx]));
+ RETURN_IF_ERROR(_flush_columns(&_segment_writers[_cur_writer_idx],
true));
std::unique_ptr<segment_v2::SegmentWriter> writer;
RETURN_IF_ERROR(_create_segment_writer(col_ids, is_key, &writer));
_segment_writers.emplace_back(std::move(writer));
@@ -93,10 +93,20 @@ Status VerticalBetaRowsetWriter::add_columns(const
vectorized::Block* block,
}
Status VerticalBetaRowsetWriter::_flush_columns(
- std::unique_ptr<segment_v2::SegmentWriter>* segment_writer) {
+ std::unique_ptr<segment_v2::SegmentWriter>* segment_writer, bool
is_key) {
uint64_t index_size = 0;
VLOG_NOTICE << "flush columns index: " << _cur_writer_idx;
RETURN_IF_ERROR((*segment_writer)->finalize_columns(&index_size));
+ if (is_key) {
+ // record segment key bound
+ KeyBoundsPB key_bounds;
+ Slice min_key = (*segment_writer)->min_encoded_key();
+ Slice max_key = (*segment_writer)->max_encoded_key();
+ DCHECK_LE(min_key.compare(max_key), 0);
+ key_bounds.set_min_key(min_key.to_string());
+ key_bounds.set_max_key(max_key.to_string());
+ _segments_encoded_key_bounds.emplace_back(key_bounds);
+ }
_total_index_size += static_cast<int64_t>(index_size);
return Status::OK();
}
diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.h
b/be/src/olap/rowset/vertical_beta_rowset_writer.h
index e935a89ce7..2c055ecb41 100644
--- a/be/src/olap/rowset/vertical_beta_rowset_writer.h
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.h
@@ -42,7 +42,8 @@ private:
Status _create_segment_writer(const std::vector<uint32_t>& column_ids,
bool is_key,
std::unique_ptr<segment_v2::SegmentWriter>*
writer);
- Status _flush_columns(std::unique_ptr<segment_v2::SegmentWriter>*
segment_writer);
+ Status _flush_columns(std::unique_ptr<segment_v2::SegmentWriter>*
segment_writer,
+ bool is_key = false);
private:
std::vector<std::unique_ptr<segment_v2::SegmentWriter>> _segment_writers;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]