This is an automated email from the ASF dual-hosted git repository.
zhangchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new bcef00df389 [fix](partial update) fix data correctness risk when load
delete sign data into a table with sequence col (#32574)
bcef00df389 is described below
commit bcef00df3895e1ea5a5ab0efa663fd07e3883ec7
Author: zhannngchen <[email protected]>
AuthorDate: Thu Mar 21 20:06:53 2024 +0800
[fix](partial update) fix data correctness risk when load delete sign data
into a table with sequence col (#32574)
---
be/src/olap/rowset/segment_v2/segment_writer.cpp | 12 +++++++-----
.../rowset/segment_v2/vertical_segment_writer.cpp | 12 +++++++-----
.../test_partial_update_seq_col_delete.out | Bin 1064 -> 1526 bytes
.../test_partial_update_seq_type_delete.out | Bin 3017 -> 2967 bytes
.../test_partial_update_seq_col_delete.groovy | 8 ++++++++
5 files changed, 22 insertions(+), 10 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index f99f7a8c9d0..6bbc3a50a74 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -533,10 +533,12 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
return st;
}
- // if the delete sign is marked, it means that the value columns of
the row
- // will not be read. So we don't need to read the missing values from
the previous rows.
- // But we still need to mark the previous row on delete bitmap
- if (have_delete_sign) {
+ // 1. if the delete sign is marked, it means that the value columns of
the row will not
+ // be read. So we don't need to read the missing values from the
previous rows.
+ // 2. the one exception is when there are sequence columns in the
table, we need to read
+ // the sequence columns, otherwise it may cause the merge-on-read
based compaction
+ // policy to produce incorrect results
+ if (have_delete_sign && !_tablet_schema->has_sequence_col()) {
has_default_or_nullable = true;
use_default_or_null_flag.emplace_back(true);
} else {
@@ -722,7 +724,7 @@ Status
SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_f
(delete_sign_column_data != nullptr &&
delete_sign_column_data[read_index[idx + segment_start_pos]] !=
0)) {
for (auto i = 0; i < cids_missing.size(); ++i) {
- // if the column has default value, fiil it with default value
+ // if the column has default value, fill it with default value
// otherwise, if the column is nullable, fill it with null
value
const auto& tablet_column =
_tablet_schema->column(cids_missing[i]);
if (tablet_column.has_default_value()) {
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index 4482884a371..4ff7eee5bde 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -464,10 +464,12 @@ Status
VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
return st;
}
- // if the delete sign is marked, it means that the value columns of
the row
- // will not be read. So we don't need to read the missing values from
the previous rows.
- // But we still need to mark the previous row on delete bitmap
- if (have_delete_sign) {
+ // 1. if the delete sign is marked, it means that the value columns of
the row will not
+ // be read. So we don't need to read the missing values from the
previous rows.
+ // 2. the one exception is when there are sequence columns in the
table, we need to read
+ // the sequence columns, otherwise it may cause the merge-on-read
based compaction
+ // policy to produce incorrect results
+ if (have_delete_sign && !_tablet_schema->has_sequence_col()) {
has_default_or_nullable = true;
use_default_or_null_flag.emplace_back(true);
} else {
@@ -648,7 +650,7 @@ Status VerticalSegmentWriter::_fill_missing_columns(
(delete_sign_column_data != nullptr &&
delete_sign_column_data[read_index[idx + segment_start_pos]] !=
0)) {
for (auto i = 0; i < missing_cids.size(); ++i) {
- // if the column has default value, fiil it with default value
+ // if the column has default value, fill it with default value
// otherwise, if the column is nullable, fill it with null
value
const auto& tablet_column =
_tablet_schema->column(missing_cids[i]);
if (tablet_column.has_default_value()) {
diff --git
a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.out
b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.out
index a59dfbf5203..fe5746c41b7 100644
Binary files
a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.out
and
b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.out
differ
diff --git
a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_type_delete.out
b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_type_delete.out
index 80f580661fc..05f0c5dab4e 100644
Binary files
a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_type_delete.out
and
b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_seq_type_delete.out
differ
diff --git
a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.groovy
b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.groovy
index c6cfdba1106..dca646948b4 100644
---
a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.groovy
+++
b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_seq_col_delete.groovy
@@ -80,6 +80,14 @@ suite("test_primary_key_partial_update_seq_col_delete",
"p0") {
select * from ${tableName} order by id;
"""
+ sql "SET show_hidden_columns=true"
+
+ sql "sync"
+ qt_partial_update_without_seq_hidden_columns """
+ select * from ${tableName} order by id;
+ """
+
+ sql "SET show_hidden_columns=false"
// provide the sequence column this time, should update according
to the
// given sequence values
streamLoad {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]