This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 91675d0ef67 [branch-2.0](cherry-pick) compaction may cause update
failue(#31551) (#31556)
91675d0ef67 is described below
commit 91675d0ef67c3cbddbb8caf9e219af93d27c0ac1
Author: zhannngchen <[email protected]>
AuthorDate: Thu Mar 7 11:36:11 2024 +0800
[branch-2.0](cherry-pick) compaction may cause update failue(#31551)
(#31556)
---
be/src/olap/rowset/segment_v2/segment_writer.cpp | 18 +++++++++++++++++-
be/src/olap/tablet.cpp | 10 +++++++++-
be/src/olap/tablet.h | 4 ++--
3 files changed, 28 insertions(+), 4 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index a777e358fc3..c1c124658b7 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -400,7 +400,23 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
std::vector<RowsetSharedPtr> specified_rowsets;
{
std::shared_lock rlock(_tablet->get_header_lock());
- specified_rowsets =
_tablet->get_rowset_by_ids(&_mow_context->rowset_ids);
+ // Under normal circumstances, `get_rowset_by_ids` does not need to
consider the stale
+ // rowset, in other word, if a rowset id is not found in the normal
rowset, we can ignore
+ // it. This is because even if we handle stale rowset here, we need to
recalculate the
+ // new rowset generated by the corresponding compaction in the publish
phase.
+ // However, for partial update, ignoring the stale rowset may cause
some keys to not be
+ // found in the flush phase (lookup_row_key returns KEY_NOT_FOUND),
and thus be mistaken
+ // as new keys in the flush phase, which will cause the load to fail
in the following
+ // two cases:
+ // 1. when strict_mode is enabled, new keys are not allowed to be
added.
+ // 2. Some columns that need to be filled are neither nullable nor
have a default value,
+ // in which case the value of the field cannot be filled as a new
key, leading to a
+ // failure of the load.
+ bool should_include_stale =
+ _opts.rowset_ctx->partial_update_info->is_strict_mode ||
+
!_opts.rowset_ctx->partial_update_info->can_insert_new_rows_in_partial_update;
+ specified_rowsets =
+ _tablet->get_rowset_by_ids(&_mow_context->rowset_ids,
should_include_stale);
if (_opts.rowset_ctx->partial_update_info->is_strict_mode &&
specified_rowsets.size() != _mow_context->rowset_ids.size()) {
// Only when this is a strict mode partial update that missing
rowsets here will lead to problems.
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 47e089add41..0b92c109901 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -3193,7 +3193,7 @@ Status Tablet::calc_delete_bitmap(RowsetSharedPtr rowset,
}
std::vector<RowsetSharedPtr> Tablet::get_rowset_by_ids(
- const RowsetIdUnorderedSet* specified_rowset_ids) {
+ const RowsetIdUnorderedSet* specified_rowset_ids, bool include_stale) {
std::vector<RowsetSharedPtr> rowsets;
for (auto& rs : _rs_version_map) {
if (!specified_rowset_ids ||
@@ -3201,6 +3201,14 @@ std::vector<RowsetSharedPtr> Tablet::get_rowset_by_ids(
rowsets.push_back(rs.second);
}
}
+ if (include_stale && specified_rowset_ids != nullptr &&
+ rowsets.size() != specified_rowset_ids->size()) {
+ for (auto& rs : _stale_rs_version_map) {
+ if (specified_rowset_ids->find(rs.second->rowset_id()) !=
specified_rowset_ids->end()) {
+ rowsets.push_back(rs.second);
+ }
+ }
+ }
std::sort(rowsets.begin(), rowsets.end(), [](RowsetSharedPtr& lhs,
RowsetSharedPtr& rhs) {
return lhs->end_version() > rhs->end_version();
});
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 0e33bf71030..d94990e4071 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -459,8 +459,8 @@ public:
DeleteBitmapPtr delete_bitmap, int64_t version,
CalcDeleteBitmapToken* token, RowsetWriter*
rowset_writer = nullptr);
- std::vector<RowsetSharedPtr> get_rowset_by_ids(
- const RowsetIdUnorderedSet* specified_rowset_ids);
+ std::vector<RowsetSharedPtr> get_rowset_by_ids(const RowsetIdUnorderedSet*
specified_rowset_ids,
+ bool include_stale = false);
Status calc_segment_delete_bitmap(RowsetSharedPtr rowset,
const segment_v2::SegmentSharedPtr& seg,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]