This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new e217aab8b3a [fix](compaction) fix mismatch between segment key and 
value column rows during compaction (#37960) (#38251) (#38356)  (#39263)
e217aab8b3a is described below

commit e217aab8b3a04ae079de556338e5dbb4d5aece22
Author: Luwei <[email protected]>
AuthorDate: Wed Aug 14 09:31:15 2024 +0800

    [fix](compaction) fix mismatch between segment key and value column rows 
during compaction (#37960) (#38251) (#38356)  (#39263)
    
    pick master #37960 #38251 #38356
---
 be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 55 ++++++++++------------
 1 file changed, 26 insertions(+), 29 deletions(-)

diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp 
b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
index 05730ec9f3a..31b9e94f732 100644
--- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
@@ -95,35 +95,32 @@ Status VerticalBetaRowsetWriter::add_columns(const 
vectorized::Block* block,
         RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, 
0, num_rows));
     } else {
         // value columns
-        uint32_t num_rows_written = 
_segment_writers[_cur_writer_idx]->num_rows_written();
-        VLOG_NOTICE << "num_rows_written: " << num_rows_written
-                    << ", _cur_writer_idx: " << _cur_writer_idx;
-        uint32_t num_rows_key_group = 
_segment_writers[_cur_writer_idx]->row_count();
-        // init if it's first value column write in current segment
-        if (_cur_writer_idx == 0 && num_rows_written == 0) {
-            VLOG_NOTICE << "init first value column segment writer";
-            RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, 
is_key));
-        }
-        // when splitting segment, need to make rows align between key columns 
and value columns
-        size_t start_offset = 0, limit = num_rows;
-        if (num_rows_written + num_rows >= num_rows_key_group &&
-            _cur_writer_idx < _segment_writers.size() - 1) {
-            RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(
-                    block, 0, num_rows_key_group - num_rows_written));
-            
RETURN_IF_ERROR(_flush_columns(&_segment_writers[_cur_writer_idx]));
-            start_offset = num_rows_key_group - num_rows_written;
-            limit = num_rows - start_offset;
-            ++_cur_writer_idx;
-            // switch to next writer
-            RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, 
is_key));
-            num_rows_written = 0;
-            num_rows_key_group = 
_segment_writers[_cur_writer_idx]->row_count();
-        }
-        if (limit > 0) {
-            RETURN_IF_ERROR(
-                    _segment_writers[_cur_writer_idx]->append_block(block, 
start_offset, limit));
-            DCHECK(_segment_writers[_cur_writer_idx]->num_rows_written() <=
-                   _segment_writers[_cur_writer_idx]->row_count());
+        int64_t left = num_rows;
+        while (left > 0) {
+            uint32_t num_rows_written = 
_segment_writers[_cur_writer_idx]->num_rows_written();
+            VLOG_NOTICE << "num_rows_written: " << num_rows_written
+                        << ", _cur_writer_idx: " << _cur_writer_idx;
+            uint32_t num_rows_key_group = 
_segment_writers[_cur_writer_idx]->row_count();
+            CHECK_LT(num_rows_written, num_rows_key_group);
+            // init if it's first value column write in current segment
+            if (num_rows_written == 0) {
+                VLOG_NOTICE << "init first value column segment writer";
+                
RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key));
+            }
+
+            int64_t to_write = num_rows_written + left >= num_rows_key_group
+                                       ? num_rows_key_group - num_rows_written
+                                       : left;
+            
RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, num_rows 
- left,
+                                                                            
to_write));
+            left -= to_write;
+            CHECK_GE(left, 0);
+
+            if (num_rows_key_group == num_rows_written + to_write &&
+                _cur_writer_idx < _segment_writers.size() - 1) {
+                
RETURN_IF_ERROR(_flush_columns(&_segment_writers[_cur_writer_idx]));
+                ++_cur_writer_idx;
+            }
         }
     }
     if (is_key) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to