This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit d3bdda6071d66b576b49d85bf2ba26f24e6c1524 Author: zhannngchen <[email protected]> AuthorDate: Thu Mar 21 20:06:53 2024 +0800 [fix](partial update) fix data correctness risk when load delete sign data into a table with sequence col (#32574) --- be/src/olap/rowset/segment_v2/segment_writer.cpp | 12 +++++++----- .../rowset/segment_v2/vertical_segment_writer.cpp | 12 +++++++----- .../test_partial_update_seq_col_delete.out | Bin 1064 -> 1526 bytes .../test_partial_update_seq_type_delete.out | Bin 3017 -> 2967 bytes .../test_partial_update_seq_col_delete.groovy | 8 ++++++++ 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 990fed3f285..6228e781f79 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -537,10 +537,12 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* return st; } - // if the delete sign is marked, it means that the value columns of the row - // will not be read. So we don't need to read the missing values from the previous rows. - // But we still need to mark the previous row on delete bitmap - if (have_delete_sign) { + // 1. if the delete sign is marked, it means that the value columns of the row will not + // be read. So we don't need to read the missing values from the previous rows. + // 2. the one exception is when there are sequence columns in the table, we need to read + // the sequence columns, otherwise it may cause the merge-on-read based compaction + // policy to produce incorrect results + if (have_delete_sign && !_tablet_schema->has_sequence_col()) { has_default_or_nullable = true; use_default_or_null_flag.emplace_back(true); } else { @@ -726,7 +728,7 @@ Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_f (delete_sign_column_data != nullptr && delete_sign_column_data[read_index[idx + segment_start_pos]] != 0)) { for (auto i = 0; i < cids_missing.size(); ++i) { - // if the column has default value, fiil it with default value + // if the column has default value, fill it with default value // otherwise, if the column is nullable, fill it with null value const auto& tablet_column = _tablet_schema->column(cids_missing[i]); if (tablet_column.has_default_value()) { diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index f60c2eb2593..4b48dd959a6 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -468,10 +468,12 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da return st; } - // if the delete sign is marked, it means that the value columns of the row - // will not be read. So we don't need to read the missing values from the previous rows. - // But we still need to mark the previous row on delete bitmap - if (have_delete_sign) { + // 1. if the delete sign is marked, it means that the value columns of the row will not + // be read. So we don't need to read the missing values from the previous rows. + // 2. the one exception is when there are sequence columns in the table, we need to read + // the sequence columns, otherwise it may cause the merge-on-read based compaction + // policy to produce incorrect results + if (have_delete_sign && !_tablet_schema->has_sequence_col()) { has_default_or_nullable = true; use_default_or_null_flag.emplace_back(true); } else { @@ -656,7 +658,7 @@ Status VerticalSegmentWriter::_fill_missing_columns( (delete_sign_column_data != nullptr && delete_sign_column_data[read_index[idx + segment_start_pos]] != 0)) { for (auto i = 0; i < missing_cids.size(); ++i) { - // if the column has default value, fiil it with default value + // if the column has default value, fill it with default value // otherwise, if the column is nullable, fill it with null value const auto& tablet_column = _tablet_schema->column(missing_cids[i]); if (tablet_column.has_default_value()) { diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.out b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.out index a59dfbf5203..fe5746c41b7 100644 Binary files a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.out and b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.out differ diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_type_delete.out b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_type_delete.out index 80f580661fc..05f0c5dab4e 100644 Binary files a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_type_delete.out and b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_type_delete.out differ diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.groovy index c6cfdba1106..dca646948b4 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.groovy @@ -80,6 +80,14 @@ suite("test_primary_key_partial_update_seq_col_delete", "p0") { select * from ${tableName} order by id; """ + sql "SET show_hidden_columns=true" + + sql "sync" + qt_partial_update_without_seq_hidden_columns """ + select * from ${tableName} order by id; + """ + + sql "SET show_hidden_columns=false" // provide the sequence column this time, should update according to the // given sequence values streamLoad { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
