This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new bd8ef4edeb [fix](cooldown) Fix core in remove_all_remote_rowsets 
(#16374)
bd8ef4edeb is described below

commit bd8ef4edebe4186173f16abe620b749c1a0bc472
Author: plat1ko <[email protected]>
AuthorDate: Sat Feb 4 22:31:38 2023 +0800

    [fix](cooldown) Fix core in remove_all_remote_rowsets (#16374)
---
 be/src/olap/data_dir.cpp                         | 46 ++++++++++++++++--------
 be/src/olap/tablet.cpp                           | 25 +++++++++----
 be/src/olap/tablet_meta.cpp                      |  5 +--
 be/test/olap/test_data/header_without_inc_rs.txt |  3 +-
 gensrc/proto/olap_file.proto                     |  7 +++-
 5 files changed, 59 insertions(+), 27 deletions(-)

diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index 4b2d1af949..f1c5b65110 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -837,22 +837,28 @@ void DataDir::perform_remote_rowset_gc() {
     for (auto& [key, val] : gc_kvs) {
         auto rowset_id = key.substr(REMOTE_ROWSET_GC_PREFIX.size());
         RemoteRowsetGcPB gc_pb;
-        gc_pb.ParseFromString(val);
+        if (!gc_pb.ParseFromString(val)) {
+            LOG(WARNING) << "malformed RemoteRowsetGcPB. rowset_id=" << 
rowset_id;
+            deleted_keys.push_back(std::move(key));
+            continue;
+        }
         auto fs = get_filesystem(gc_pb.resource_id());
         if (!fs) {
             LOG(WARNING) << "Cannot get file system: " << gc_pb.resource_id();
             continue;
         }
         DCHECK(fs->type() != io::FileSystemType::LOCAL);
-        Status st;
+        bool success = true;
         for (int i = 0; i < gc_pb.num_segments(); ++i) {
             auto seg_path = BetaRowset::remote_segment_path(gc_pb.tablet_id(), 
rowset_id, i);
-            st = fs->delete_file(seg_path);
+            // TODO(plat1ko): batch delete
+            auto st = fs->delete_file(seg_path);
             if (!st.ok()) {
-                LOG(WARNING) << "failed to perform remote rowset gc, err=" << 
st;
+                LOG(WARNING) << "failed to delete remote rowset. err=" << st;
+                success = false;
             }
         }
-        if (st.ok()) {
+        if (success) {
             deleted_keys.push_back(std::move(key));
         }
     }
@@ -870,20 +876,30 @@ void DataDir::perform_remote_tablet_gc() {
     };
     _meta->iterate(META_COLUMN_FAMILY_INDEX, REMOTE_TABLET_GC_PREFIX, 
traverse_remote_tablet_func);
     std::vector<std::string> deleted_keys;
-    for (auto& [key, resource] : tablet_gc_kvs) {
+    for (auto& [key, val] : tablet_gc_kvs) {
         auto tablet_id = key.substr(REMOTE_TABLET_GC_PREFIX.size());
-        auto fs = get_filesystem(resource);
-        if (!fs) {
-            LOG(WARNING) << "could not get file system. resource_id=" << 
resource;
+        RemoteTabletGcPB gc_pb;
+        if (!gc_pb.ParseFromString(val)) {
+            LOG(WARNING) << "malformed RemoteTabletGcPB. tablet_id=" << 
tablet_id;
+            deleted_keys.push_back(std::move(key));
             continue;
         }
-        auto st = fs->delete_directory(DATA_PREFIX + '/' + tablet_id);
-        LOG(INFO) << "perform remote tablet gc. tablet_id=" << tablet_id;
-        if (st.ok()) {
+        bool success = true;
+        for (auto& resource_id : gc_pb.resource_ids()) {
+            auto fs = get_filesystem(resource_id);
+            if (!fs) {
+                LOG(WARNING) << "could not get file system. resource_id=" << 
resource_id;
+                success = false;
+                continue;
+            }
+            auto st = fs->delete_directory(DATA_PREFIX + '/' + tablet_id);
+            if (!st.ok()) {
+                LOG(WARNING) << "failed to delete all remote rowset in tablet. 
err=" << st;
+                success = false;
+            }
+        }
+        if (success) {
             deleted_keys.push_back(std::move(key));
-        } else {
-            LOG(WARNING) << "failed to perform remote tablet gc. tablet_id=" 
<< tablet_id
-                         << ", reason: " << st;
         }
     }
     for (const auto& key : deleted_keys) {
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 8a377b4c99..8f385b1170 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1927,21 +1927,32 @@ void Tablet::record_unused_remote_rowset(const 
RowsetId& rowset_id, const std::s
     gc_pb.set_resource_id(resource);
     gc_pb.set_tablet_id(tablet_id());
     gc_pb.set_num_segments(num_segments);
-    WARN_IF_ERROR(
-            _data_dir->get_meta()->put(META_COLUMN_FAMILY_INDEX, gc_key, 
gc_pb.SerializeAsString()),
-            fmt::format("Failed to record unused remote rowset(tablet id: {}, 
rowset id: {})",
-                        tablet_id(), rowset_id.to_string()));
+    auto st =
+            _data_dir->get_meta()->put(META_COLUMN_FAMILY_INDEX, gc_key, 
gc_pb.SerializeAsString());
+    if (!st.ok()) {
+        LOG(WARNING) << "failed to record unused remote rowset. tablet_id=" << 
tablet_id()
+                     << " rowset_id=" << rowset_id << " resource_id=" << 
resource;
+    }
 }
 
 Status Tablet::remove_all_remote_rowsets() {
     DCHECK(_state == TABLET_SHUTDOWN);
-    if (storage_policy_id() == 0) {
+    std::set<std::string> resource_ids;
+    for (auto& rs_meta : _tablet_meta->all_rs_metas()) {
+        if (!rs_meta->is_local()) {
+            resource_ids.insert(rs_meta->resource_id());
+        }
+    }
+    if (resource_ids.empty()) {
         return Status::OK();
     }
     auto tablet_gc_key = REMOTE_TABLET_GC_PREFIX + std::to_string(tablet_id());
-    auto policy = get_storage_policy(storage_policy_id());
+    RemoteTabletGcPB gc_pb;
+    for (auto& resource_id : resource_ids) {
+        gc_pb.add_resource_ids(resource_id);
+    }
     return _data_dir->get_meta()->put(META_COLUMN_FAMILY_INDEX, tablet_gc_key,
-                                      std::to_string(policy->resource_id));
+                                      gc_pb.SerializeAsString());
 }
 
 TabletSchemaSPtr Tablet::tablet_schema() const {
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index da6a5b1c29..756835f1b7 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -588,8 +588,9 @@ void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) {
     if (_preferred_rowset_type == BETA_ROWSET) {
         tablet_meta_pb->set_preferred_rowset_type(_preferred_rowset_type);
     }
-
-    tablet_meta_pb->set_storage_policy_id(_storage_policy_id);
+    if (_storage_policy_id > 0) {
+        tablet_meta_pb->set_storage_policy_id(_storage_policy_id);
+    }
     
tablet_meta_pb->set_enable_unique_key_merge_on_write(_enable_unique_key_merge_on_write);
 
     if (_enable_unique_key_merge_on_write) {
diff --git a/be/test/olap/test_data/header_without_inc_rs.txt 
b/be/test/olap/test_data/header_without_inc_rs.txt
index 88b0422354..1aa0121ee8 100644
--- a/be/test/olap/test_data/header_without_inc_rs.txt
+++ b/be/test/olap/test_data/header_without_inc_rs.txt
@@ -106,6 +106,5 @@
     "preferred_rowset_type": "BETA_ROWSET",
     "tablet_type": "TABLET_TYPE_DISK",
     "replica_id": 0,
-    "enable_unique_key_merge_on_write": false,
-    "storage_policy_id": 0
+    "enable_unique_key_merge_on_write": false
 }
diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto
index 7fad92e321..4452373335 100644
--- a/gensrc/proto/olap_file.proto
+++ b/gensrc/proto/olap_file.proto
@@ -115,13 +115,18 @@ message RowsetMetaPB {
     optional SegmentsOverlapPB segments_overlap_pb = 51 [default = 
OVERLAP_UNKNOWN];
 }
 
-// unused remote rowsets garbage collection kv value
+// kv value for reclaiming remote rowset
 message RemoteRowsetGcPB {
     required string resource_id = 1;
     required int64 tablet_id = 2;
     required int64 num_segments = 3;
 }
 
+// kv value for reclaiming all remote rowsets of tablet
+message RemoteTabletGcPB {
+    repeated string resource_ids = 1;
+}
+
 enum DataFileType {
     OLAP_DATA_FILE = 0; //Deprecated. Only columnar-wise format is supported.
     COLUMN_ORIENTED_FILE = 1;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to