This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit d3bdda6071d66b576b49d85bf2ba26f24e6c1524
Author: zhannngchen <[email protected]>
AuthorDate: Thu Mar 21 20:06:53 2024 +0800

    [fix](partial update) fix data correctness risk when load delete sign data 
into a table with sequence col (#32574)
---
 be/src/olap/rowset/segment_v2/segment_writer.cpp      |  12 +++++++-----
 .../rowset/segment_v2/vertical_segment_writer.cpp     |  12 +++++++-----
 .../test_partial_update_seq_col_delete.out            | Bin 1064 -> 1526 bytes
 .../test_partial_update_seq_type_delete.out           | Bin 3017 -> 2967 bytes
 .../test_partial_update_seq_col_delete.groovy         |   8 ++++++++
 5 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 990fed3f285..6228e781f79 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -537,10 +537,12 @@ Status 
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
             return st;
         }
 
-        // if the delete sign is marked, it means that the value columns of 
the row
-        // will not be read. So we don't need to read the missing values from 
the previous rows.
-        // But we still need to mark the previous row on delete bitmap
-        if (have_delete_sign) {
+        // 1. if the delete sign is marked, it means that the value columns of 
the row will not
+        //    be read. So we don't need to read the missing values from the 
previous rows.
+        // 2. the one exception is when there are sequence columns in the 
table, we need to read
+        //    the sequence columns, otherwise it may cause the merge-on-read 
based compaction
+        //    policy to produce incorrect results
+        if (have_delete_sign && !_tablet_schema->has_sequence_col()) {
             has_default_or_nullable = true;
             use_default_or_null_flag.emplace_back(true);
         } else {
@@ -726,7 +728,7 @@ Status 
SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_f
             (delete_sign_column_data != nullptr &&
              delete_sign_column_data[read_index[idx + segment_start_pos]] != 
0)) {
             for (auto i = 0; i < cids_missing.size(); ++i) {
-                // if the column has default value, fiil it with default value
+                // if the column has default value, fill it with default value
                 // otherwise, if the column is nullable, fill it with null 
value
                 const auto& tablet_column = 
_tablet_schema->column(cids_missing[i]);
                 if (tablet_column.has_default_value()) {
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index f60c2eb2593..4b48dd959a6 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -468,10 +468,12 @@ Status 
VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
             return st;
         }
 
-        // if the delete sign is marked, it means that the value columns of 
the row
-        // will not be read. So we don't need to read the missing values from 
the previous rows.
-        // But we still need to mark the previous row on delete bitmap
-        if (have_delete_sign) {
+        // 1. if the delete sign is marked, it means that the value columns of 
the row will not
+        //    be read. So we don't need to read the missing values from the 
previous rows.
+        // 2. the one exception is when there are sequence columns in the 
table, we need to read
+        //    the sequence columns, otherwise it may cause the merge-on-read 
based compaction
+        //    policy to produce incorrect results
+        if (have_delete_sign && !_tablet_schema->has_sequence_col()) {
             has_default_or_nullable = true;
             use_default_or_null_flag.emplace_back(true);
         } else {
@@ -656,7 +658,7 @@ Status VerticalSegmentWriter::_fill_missing_columns(
             (delete_sign_column_data != nullptr &&
              delete_sign_column_data[read_index[idx + segment_start_pos]] != 
0)) {
             for (auto i = 0; i < missing_cids.size(); ++i) {
-                // if the column has default value, fiil it with default value
+                // if the column has default value, fill it with default value
                 // otherwise, if the column is nullable, fill it with null 
value
                 const auto& tablet_column = 
_tablet_schema->column(missing_cids[i]);
                 if (tablet_column.has_default_value()) {
diff --git 
a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.out
 
b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.out
index a59dfbf5203..fe5746c41b7 100644
Binary files 
a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.out
 and 
b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.out
 differ
diff --git 
a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_type_delete.out
 
b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_type_delete.out
index 80f580661fc..05f0c5dab4e 100644
Binary files 
a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_type_delete.out
 and 
b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_type_delete.out
 differ
diff --git 
a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.groovy
 
b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.groovy
index c6cfdba1106..dca646948b4 100644
--- 
a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.groovy
+++ 
b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.groovy
@@ -80,6 +80,14 @@ suite("test_primary_key_partial_update_seq_col_delete", 
"p0") {
                 select * from ${tableName} order by id;
             """
 
+            sql "SET show_hidden_columns=true"
+
+            sql "sync"
+            qt_partial_update_without_seq_hidden_columns """
+                select * from ${tableName} order by id;
+            """
+
+            sql "SET show_hidden_columns=false"
             // provide the sequence column this time, should update according 
to the
             // given sequence values
             streamLoad {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to