gavinchou commented on code in PR #54395:
URL: https://github.com/apache/doris/pull/54395#discussion_r2286792546


##########
be/src/cloud/cloud_meta_mgr.cpp:
##########
@@ -813,6 +814,13 @@ Status 
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
                     RETURN_IF_ERROR(tablet->merge_rowsets_schema());
                 }
             }
+
+            // Fill version holes
+            if (config::enable_fill_version_holes) {

Review Comment:
   这个开关和skip empty 应该是同一个?



##########
be/src/cloud/cloud_tablet.cpp:
##########
@@ -723,6 +723,10 @@ void CloudTablet::reset_approximate_stats(int64_t 
num_rowsets, int64_t num_segme
         if (v.second < cp) {
             continue;
         }
+        // Skip hole rowsets, which are not counted in the statistics
+        if (r->is_hole_rowset()) {

Review Comment:
   空的不应该跳过,也要算上的



##########
be/src/cloud/config.cpp:
##########
@@ -91,6 +91,9 @@ DEFINE_mInt32(meta_service_conflict_error_retry_times, "10");
 
 DEFINE_Bool(enable_check_storage_vault, "true");
 
+DEFINE_mBool(skip_writing_empty_rowset_metadata, "true");

Review Comment:
   默认false 比较好



##########
be/src/cloud/cloud_meta_mgr.cpp:
##########
@@ -1712,5 +1720,121 @@ Status CloudMetaMgr::get_schema_dict(int64_t index_id,
     return Status::OK();
 }
 
+Status CloudMetaMgr::fill_version_holes(CloudTablet* tablet, int64_t 
max_version,

Review Comment:
   这两个新增的函数要加足够的单测来覆盖,应该是比较好加的,比系统测试方便



##########
be/src/cloud/cloud_meta_mgr.cpp:
##########
@@ -1712,5 +1720,121 @@ Status CloudMetaMgr::get_schema_dict(int64_t index_id,
     return Status::OK();
 }
 
+Status CloudMetaMgr::fill_version_holes(CloudTablet* tablet, int64_t 
max_version,
+                                        std::unique_lock<std::shared_mutex>& 
wlock) {
+    if (max_version <= 0) {
+        return Status::OK();
+    }
+
+    Versions existing_versions;
+    for (const auto& rs : tablet->tablet_meta()->all_rs_metas()) {
+        existing_versions.emplace_back(rs->version());
+    }
+
+    // If there are no existing versions, it may be a new tablet for restore, 
so skip filling holes.
+    if (existing_versions.empty()) {
+        return Status::OK();
+    }
+
+    std::vector<RowsetSharedPtr> hole_rowsets;
+    // sort the existing versions in ascending order
+    std::sort(existing_versions.begin(), existing_versions.end(),
+              [](const Version& a, const Version& b) {
+                  // simple because 2 versions are certainly not overlapping
+                  return a.first < b.first;
+              });
+
+    int64_t last_version = -1;
+    for (const Version& version : existing_versions) {
+        // missing versions are those that are not in the existing_versions
+        if (version.first > last_version + 1) {
+            // there is a hole between versions
+            auto prev_non_hole_rowset = tablet->get_rowset_by_version(version);
+            for (int64_t ver = last_version + 1; ver < version.first; ++ver) {
+                RowsetSharedPtr hole_rowset;
+                RETURN_IF_ERROR(create_empty_rowset_for_hole(
+                        tablet, ver, prev_non_hole_rowset->rowset_meta(), 
&hole_rowset));
+                hole_rowsets.push_back(hole_rowset);
+            }
+            LOG(INFO) << "Created empty rowset for version hole, from " << 
last_version + 1
+                      << " to " << version.first - 1 << " for tablet " << 
tablet->tablet_id();
+        }
+        last_version = version.second;
+    }
+
+    if (last_version + 1 <= max_version) {
+        LOG(INFO) << "Created empty rowset for version hole, from " << 
last_version + 1 << " to "
+                  << max_version << " for tablet " << tablet->tablet_id();
+        for (; last_version + 1 <= max_version; ++last_version) {
+            RowsetSharedPtr hole_rowset;
+            auto prev_non_hole_rowset = 
tablet->get_rowset_by_version(existing_versions.back());
+            RETURN_IF_ERROR(create_empty_rowset_for_hole(
+                    tablet, last_version + 1, 
prev_non_hole_rowset->rowset_meta(), &hole_rowset));
+            hole_rowsets.push_back(hole_rowset);
+        }
+    }
+
+    if (!hole_rowsets.empty()) {
+        size_t hole_count = hole_rowsets.size();
+        tablet->add_rowsets(std::move(hole_rowsets), false, wlock, false);
+        g_cloud_version_hole_filled_count << hole_count;
+    }
+    return Status::OK();
+}
+
+Status CloudMetaMgr::create_empty_rowset_for_hole(CloudTablet* tablet, int64_t 
version,
+                                                  RowsetMetaSharedPtr 
prev_rowset_meta,
+                                                  RowsetSharedPtr* rowset) {
+    // Create a RowsetMeta for the empty rowset
+    auto rs_meta = std::make_shared<RowsetMeta>();
+
+    // Generate a deterministic rowset ID for the hole (same tablet_id + 
version = same rowset_id)
+    RowsetId hole_rowset_id;
+    hole_rowset_id.init(2, 0, tablet->tablet_id(), version);

Review Comment:
   这个id可以一眼看出来是补的吗?比如弄个个0300000开头的



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to