This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new f277decdab6 branch-4.1: [fix](cloud) Fill schema change version holes
before running #63443 (#63463)
f277decdab6 is described below
commit f277decdab6a8983b628667744093a7435b6712e
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu May 21 19:19:03 2026 +0800
branch-4.1: [fix](cloud) Fill schema change version holes before running
#63443 (#63463)
Cherry-picked from #63443
Co-authored-by: Xin Liao <[email protected]>
---
be/src/cloud/cloud_schema_change_job.cpp | 4 +
be/test/cloud/cloud_schema_change_job_test.cpp | 109 +++++++++++++++++++++++++
be/test/cloud/cloud_tablet_test.cpp | 50 ++++++++++++
3 files changed, 163 insertions(+)
diff --git a/be/src/cloud/cloud_schema_change_job.cpp
b/be/src/cloud/cloud_schema_change_job.cpp
index eb706a70d17..3536d831db7 100644
--- a/be/src/cloud/cloud_schema_change_job.cpp
+++ b/be/src/cloud/cloud_schema_change_job.cpp
@@ -564,6 +564,10 @@ Status
CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParam
}
}
_new_tablet->add_rowsets(std::move(_output_rowsets), true, wlock,
false);
+ // Ensure the real new tablet has a continuous local version graph
before it becomes
+ // visible. Later RUNNING-tablet delete bitmap sync depends on
capturing all old versions.
+ RETURN_IF_ERROR(_cloud_storage_engine.meta_mgr().fill_version_holes(
+ _new_tablet.get(), _new_tablet->max_version_unlocked(),
wlock));
_new_tablet->set_cumulative_layer_point(_output_cumulative_point);
_new_tablet->reset_approximate_stats(stats.num_rowsets(),
stats.num_segments(),
stats.num_rows(),
stats.data_size());
diff --git a/be/test/cloud/cloud_schema_change_job_test.cpp
b/be/test/cloud/cloud_schema_change_job_test.cpp
index 82cfe92edac..972ff2af255 100644
--- a/be/test/cloud/cloud_schema_change_job_test.cpp
+++ b/be/test/cloud/cloud_schema_change_job_test.cpp
@@ -22,6 +22,7 @@
#include <gtest/gtest.h>
#include <memory>
+#include <vector>
#include "cloud/cloud_cluster_info.h"
#include "cloud/cloud_storage_engine.h"
@@ -96,6 +97,114 @@ protected:
std::shared_ptr<CloudClusterInfo> _cluster_info;
};
+TEST_F(CloudSchemaChangeJobTest, FillVersionHolesBeforeNewTabletRunning) {
+ int64_t base_tablet_id = 40001;
+ int64_t new_tablet_id = 40002;
+
+ TabletMetaSharedPtr base_meta(new TabletMeta(
+ 1, 2, base_tablet_id, base_tablet_id + 100, 4, 5, TTabletSchema(),
6, {{7, 8}},
+ UniqueId(9, 10), TTabletType::TABLET_TYPE_DISK,
TCompressionType::LZ4F));
+ TabletMetaSharedPtr new_meta(new TabletMeta(
+ 1, 2, new_tablet_id, new_tablet_id + 100, 4, 5, TTabletSchema(),
6, {{7, 8}},
+ UniqueId(11, 12), TTabletType::TABLET_TYPE_DISK,
TCompressionType::LZ4F));
+
+ auto base_tablet = std::make_shared<CloudTablet>(_engine,
std::move(base_meta));
+ auto new_tablet = std::make_shared<CloudTablet>(_engine,
std::move(new_meta));
+ static_cast<void>(new_tablet->set_tablet_state(TABLET_NOTREADY));
+
+ auto placeholder = create_rowset(new_tablet->tablet_schema(),
new_tablet_id, 0, 1);
+ auto rowset_after_hole = create_rowset(new_tablet->tablet_schema(),
new_tablet_id, 4, 4);
+ ASSERT_NE(placeholder, nullptr);
+ ASSERT_NE(rowset_after_hole, nullptr);
+
+ auto* sp = SyncPoint::get_instance();
+ sp->clear_all_call_backs();
+ sp->enable_processing();
+
+ sp->set_call_back("CloudMetaMgr::get_tablet_meta", [&](auto&& args) {
+ auto tablet_id = try_any_cast<int64_t>(args[0]);
+ auto* meta_ptr = try_any_cast<TabletMetaSharedPtr*>(args[1]);
+ if (tablet_id == base_tablet_id) {
+ *meta_ptr = base_tablet->tablet_meta();
+ } else if (tablet_id == new_tablet_id) {
+ *meta_ptr = new_tablet->tablet_meta();
+ }
+ try_any_cast_ret<Status>(args)->second = true;
+ });
+
+ CloudTablet* loaded_new_tablet = nullptr;
+ sp->set_call_back("CloudMetaMgr::sync_tablet_rowsets", [&](auto&& outcome)
{
+ auto* tablet = try_any_cast<CloudTablet*>(outcome[0]);
+ if (tablet->tablet_id() == new_tablet_id) {
+ loaded_new_tablet = tablet;
+ std::unique_lock lock(tablet->get_header_lock());
+ std::vector<RowsetSharedPtr> rowsets;
+ if (!tablet->rowset_map().count(Version(0, 1))) {
+ rowsets.push_back(placeholder);
+ }
+ if (!tablet->rowset_map().count(Version(4, 4))) {
+ rowsets.push_back(rowset_after_hole);
+ }
+ tablet->add_rowsets(std::move(rowsets), false, lock, false);
+ }
+ auto* pairs = try_any_cast_ret<Status>(outcome);
+ pairs->second = true;
+ pairs->first = Status::OK();
+ });
+
+ sp->set_call_back("CloudMetaMgr::prepare_tablet_job", [](auto&& outcome) {
+ auto* pairs = try_any_cast_ret<Status>(outcome);
+ pairs->second = true;
+ pairs->first = Status::OK();
+
+ auto* resp = try_any_cast<cloud::StartTabletJobResponse*>(outcome[1]);
+ resp->mutable_status()->set_code(cloud::MetaServiceCode::OK);
+ resp->set_alter_version(2);
+ });
+
+ bool commit_called = false;
+ sp->set_call_back("CloudMetaMgr::commit_tablet_job", [&](auto&& outcome) {
+ commit_called = true;
+ auto* pairs = try_any_cast_ret<Status>(outcome);
+ pairs->second = true;
+ pairs->first = Status::OK();
+
+ auto* resp = try_any_cast<cloud::FinishTabletJobResponse*>(outcome[1]);
+ resp->mutable_status()->set_code(cloud::MetaServiceCode::OK);
+ auto* stats = resp->mutable_stats();
+ stats->set_num_rowsets(3);
+ stats->set_num_segments(0);
+ stats->set_num_rows(0);
+ stats->set_data_size(0);
+ });
+
+ TAlterTabletReqV2 request;
+ request.base_tablet_id = base_tablet_id;
+ request.new_tablet_id = new_tablet_id;
+ request.alter_version = 1;
+ request.__set_alter_tablet_type(TAlterTabletType::SCHEMA_CHANGE);
+
+ CloudSchemaChangeJob sc_job(_engine, "test_fill_holes_before_running",
9999999999);
+ auto status = sc_job.process_alter_tablet(request);
+
+ ASSERT_TRUE(status.ok()) << status.to_string();
+ ASSERT_TRUE(commit_called);
+ ASSERT_NE(loaded_new_tablet, nullptr);
+ ASSERT_EQ(loaded_new_tablet->tablet_state(), TABLET_RUNNING);
+ ASSERT_TRUE(loaded_new_tablet->rowset_map().count(Version(3, 3)));
+ auto hole_rowset = loaded_new_tablet->rowset_map().at(Version(3, 3));
+ ASSERT_TRUE(hole_rowset->empty());
+ ASSERT_TRUE(hole_rowset->is_hole_rowset());
+
+ auto versions_result =
+ loaded_new_tablet->capture_consistent_versions_unlocked(Version(3,
4), {});
+ ASSERT_TRUE(versions_result.has_value()) << versions_result.error();
+ const auto& versions = versions_result.value();
+ ASSERT_EQ(versions.size(), 2);
+ ASSERT_EQ(versions[0], Version(3, 3));
+ ASSERT_EQ(versions[1], Version(4, 4));
+}
+
// Test: cross-V1 compaction detected → abort SC job → return
SC_COMPACTION_CONFLICT
TEST_F(CloudSchemaChangeJobTest, CrossV1CompactionDetected) {
int64_t base_tablet_id = 10001;
diff --git a/be/test/cloud/cloud_tablet_test.cpp
b/be/test/cloud/cloud_tablet_test.cpp
index d2f201b14d4..3c66f227ab3 100644
--- a/be/test/cloud/cloud_tablet_test.cpp
+++ b/be/test/cloud/cloud_tablet_test.cpp
@@ -26,6 +26,7 @@
#include <cstdint>
#include <ranges>
+#include "cloud/cloud_meta_mgr.h"
#include "cloud/cloud_storage_engine.h"
#include "cloud/cloud_warm_up_manager.h"
#include "common/config.h"
@@ -1379,6 +1380,7 @@ public:
rs_meta->set_rowset_type(BETA_ROWSET);
rs_meta->set_version(version);
rs_meta->set_rowset_id(_engine.next_rowset_id());
+ rs_meta->set_tablet_schema(_tablet->tablet_schema());
RowsetSharedPtr rowset;
Status st = RowsetFactory::create_rowset(nullptr, "", rs_meta,
&rowset);
if (!st.ok()) {
@@ -1546,6 +1548,54 @@ TEST_F(CloudTabletDeleteRowsetsForSchemaChangeTest,
TestMultipleCompactionRowset
ASSERT_EQ(versions[10], Version(11, 11));
}
+TEST_F(CloudTabletDeleteRowsetsForSchemaChangeTest,
TestFillVersionHolesBeforeSchemaChangeRunning) {
+ static_cast<void>(_tablet->set_tablet_state(TABLET_NOTREADY));
+ _tablet->set_alter_version(10);
+
+ auto rs_placeholder = create_rowset(Version(0, 1));
+ auto rs_historical = create_rowset(Version(2, 10));
+ auto rs_after_first_hole = create_rowset(Version(12, 12));
+ auto rs_after_second_hole = create_rowset(Version(14, 14));
+ ASSERT_NE(rs_placeholder, nullptr);
+ ASSERT_NE(rs_historical, nullptr);
+ ASSERT_NE(rs_after_first_hole, nullptr);
+ ASSERT_NE(rs_after_second_hole, nullptr);
+
+ cloud::CloudMetaMgr meta_mgr;
+ {
+ std::unique_lock wlock(_tablet->get_header_lock());
+ _tablet->add_rowsets(
+ {rs_placeholder, rs_historical, rs_after_first_hole,
rs_after_second_hole}, false,
+ wlock, false);
+ ASSERT_FALSE(_tablet->rowset_map().count(Version(11, 11)));
+ ASSERT_FALSE(_tablet->rowset_map().count(Version(13, 13)));
+ ASSERT_FALSE(_tablet->capture_consistent_versions_unlocked(Version(0,
14), {}).has_value());
+
+ auto status =
+ meta_mgr.fill_version_holes(_tablet.get(),
_tablet->max_version_unlocked(), wlock);
+ ASSERT_TRUE(status.ok()) << status.to_string();
+ ASSERT_TRUE(_tablet->set_tablet_state(TABLET_RUNNING).ok());
+ }
+
+ for (const Version& version : {Version(11, 11), Version(13, 13)}) {
+ ASSERT_TRUE(_tablet->rowset_map().count(version));
+ auto hole_rowset = _tablet->rowset_map().at(version);
+ ASSERT_TRUE(hole_rowset->empty());
+ ASSERT_TRUE(hole_rowset->is_hole_rowset());
+ }
+
+ auto versions_result =
_tablet->capture_consistent_versions_unlocked(Version(0, 14), {});
+ ASSERT_TRUE(versions_result.has_value()) << versions_result.error();
+ const auto& versions = versions_result.value();
+ ASSERT_EQ(versions.size(), 6);
+ ASSERT_EQ(versions[0], Version(0, 1));
+ ASSERT_EQ(versions[1], Version(2, 10));
+ ASSERT_EQ(versions[2], Version(11, 11));
+ ASSERT_EQ(versions[3], Version(12, 12));
+ ASSERT_EQ(versions[4], Version(13, 13));
+ ASSERT_EQ(versions[5], Version(14, 14));
+}
+
// Reproduce the CI crash scenario: SC delete puts rowsets to stale, then
// compaction creates a new stale path with overlapping version keys. When
// one stale path is cleaned, the other hits DCHECK(false) because the
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]