This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7d2e431856a [fix](compaction) fix mismatch between segment key and
value column rows during compaction (#37960)
7d2e431856a is described below
commit 7d2e431856a495b6c573a74690d7a72b05462b46
Author: Luwei <[email protected]>
AuthorDate: Mon Jul 22 17:09:32 2024 +0800
[fix](compaction) fix mismatch between segment key and value column rows
during compaction (#37960)
When a block is splitted to 3 segments, old code just handles 2 and the
last is overlowed.
---
be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 54 ++++++++++------------
1 file changed, 24 insertions(+), 30 deletions(-)
diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
index 1de7d4f50dc..942ced616fc 100644
--- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
@@ -83,36 +83,30 @@ Status VerticalBetaRowsetWriter<T>::add_columns(const
vectorized::Block* block,
RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block,
0, num_rows));
} else {
// value columns
- uint32_t num_rows_written =
_segment_writers[_cur_writer_idx]->num_rows_written();
- VLOG_NOTICE << "num_rows_written: " << num_rows_written
- << ", _cur_writer_idx: " << _cur_writer_idx;
- uint32_t num_rows_key_group =
_segment_writers[_cur_writer_idx]->row_count();
- // init if it's first value column write in current segment
- if (_cur_writer_idx == 0 && num_rows_written == 0) {
- VLOG_NOTICE << "init first value column segment writer";
- RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids,
is_key));
- }
- // when splitting segment, need to make rows align between key columns
and value columns
- size_t start_offset = 0;
- size_t limit = num_rows;
- if (num_rows_written + num_rows >= num_rows_key_group &&
- _cur_writer_idx < _segment_writers.size() - 1) {
- RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(
- block, 0, num_rows_key_group - num_rows_written));
-
RETURN_IF_ERROR(_flush_columns(_segment_writers[_cur_writer_idx].get()));
- start_offset = num_rows_key_group - num_rows_written;
- limit = num_rows - start_offset;
- ++_cur_writer_idx;
- // switch to next writer
- RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids,
is_key));
- num_rows_written = 0;
- num_rows_key_group =
_segment_writers[_cur_writer_idx]->row_count();
- }
- if (limit > 0) {
- RETURN_IF_ERROR(
- _segment_writers[_cur_writer_idx]->append_block(block,
start_offset, limit));
- DCHECK(_segment_writers[_cur_writer_idx]->num_rows_written() <=
- _segment_writers[_cur_writer_idx]->row_count());
+ int64_t left = num_rows;
+ while (left > 0) {
+ uint32_t num_rows_written =
_segment_writers[_cur_writer_idx]->num_rows_written();
+ VLOG_NOTICE << "num_rows_written: " << num_rows_written
+ << ", _cur_writer_idx: " << _cur_writer_idx;
+ uint32_t num_rows_key_group =
_segment_writers[_cur_writer_idx]->row_count();
+ CHECK_LE(num_rows_written, num_rows_key_group);
+ // init if it's first value column write in current segment
+ if (num_rows_written == 0) {
+ VLOG_NOTICE << "init first value column segment writer";
+
RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key));
+ }
+
+ int64_t to_write = num_rows_written + left >= num_rows_key_group
+ ? num_rows_key_group - num_rows_written
+ : left;
+
RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, num_rows
- left,
+
to_write));
+ left -= to_write;
+ CHECK_GE(left, 0);
+
+ if (left > 0) {
+ ++_cur_writer_idx;
+ }
}
}
if (is_key) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]