This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new bd8ef4edeb [fix](cooldown) Fix core in remove_all_remote_rowsets
(#16374)
bd8ef4edeb is described below
commit bd8ef4edebe4186173f16abe620b749c1a0bc472
Author: plat1ko <[email protected]>
AuthorDate: Sat Feb 4 22:31:38 2023 +0800
[fix](cooldown) Fix core in remove_all_remote_rowsets (#16374)
---
be/src/olap/data_dir.cpp | 46 ++++++++++++++++--------
be/src/olap/tablet.cpp | 25 +++++++++----
be/src/olap/tablet_meta.cpp | 5 +--
be/test/olap/test_data/header_without_inc_rs.txt | 3 +-
gensrc/proto/olap_file.proto | 7 +++-
5 files changed, 59 insertions(+), 27 deletions(-)
diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index 4b2d1af949..f1c5b65110 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -837,22 +837,28 @@ void DataDir::perform_remote_rowset_gc() {
for (auto& [key, val] : gc_kvs) {
auto rowset_id = key.substr(REMOTE_ROWSET_GC_PREFIX.size());
RemoteRowsetGcPB gc_pb;
- gc_pb.ParseFromString(val);
+ if (!gc_pb.ParseFromString(val)) {
+ LOG(WARNING) << "malformed RemoteRowsetGcPB. rowset_id=" <<
rowset_id;
+ deleted_keys.push_back(std::move(key));
+ continue;
+ }
auto fs = get_filesystem(gc_pb.resource_id());
if (!fs) {
LOG(WARNING) << "Cannot get file system: " << gc_pb.resource_id();
continue;
}
DCHECK(fs->type() != io::FileSystemType::LOCAL);
- Status st;
+ bool success = true;
for (int i = 0; i < gc_pb.num_segments(); ++i) {
auto seg_path = BetaRowset::remote_segment_path(gc_pb.tablet_id(),
rowset_id, i);
- st = fs->delete_file(seg_path);
+ // TODO(plat1ko): batch delete
+ auto st = fs->delete_file(seg_path);
if (!st.ok()) {
- LOG(WARNING) << "failed to perform remote rowset gc, err=" <<
st;
+ LOG(WARNING) << "failed to delete remote rowset. err=" << st;
+ success = false;
}
}
- if (st.ok()) {
+ if (success) {
deleted_keys.push_back(std::move(key));
}
}
@@ -870,20 +876,30 @@ void DataDir::perform_remote_tablet_gc() {
};
_meta->iterate(META_COLUMN_FAMILY_INDEX, REMOTE_TABLET_GC_PREFIX,
traverse_remote_tablet_func);
std::vector<std::string> deleted_keys;
- for (auto& [key, resource] : tablet_gc_kvs) {
+ for (auto& [key, val] : tablet_gc_kvs) {
auto tablet_id = key.substr(REMOTE_TABLET_GC_PREFIX.size());
- auto fs = get_filesystem(resource);
- if (!fs) {
- LOG(WARNING) << "could not get file system. resource_id=" <<
resource;
+ RemoteTabletGcPB gc_pb;
+ if (!gc_pb.ParseFromString(val)) {
+ LOG(WARNING) << "malformed RemoteTabletGcPB. tablet_id=" <<
tablet_id;
+ deleted_keys.push_back(std::move(key));
continue;
}
- auto st = fs->delete_directory(DATA_PREFIX + '/' + tablet_id);
- LOG(INFO) << "perform remote tablet gc. tablet_id=" << tablet_id;
- if (st.ok()) {
+ bool success = true;
+ for (auto& resource_id : gc_pb.resource_ids()) {
+ auto fs = get_filesystem(resource_id);
+ if (!fs) {
+ LOG(WARNING) << "could not get file system. resource_id=" <<
resource_id;
+ success = false;
+ continue;
+ }
+ auto st = fs->delete_directory(DATA_PREFIX + '/' + tablet_id);
+ if (!st.ok()) {
+ LOG(WARNING) << "failed to delete all remote rowset in tablet.
err=" << st;
+ success = false;
+ }
+ }
+ if (success) {
deleted_keys.push_back(std::move(key));
- } else {
- LOG(WARNING) << "failed to perform remote tablet gc. tablet_id="
<< tablet_id
- << ", reason: " << st;
}
}
for (const auto& key : deleted_keys) {
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 8a377b4c99..8f385b1170 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1927,21 +1927,32 @@ void Tablet::record_unused_remote_rowset(const
RowsetId& rowset_id, const std::s
gc_pb.set_resource_id(resource);
gc_pb.set_tablet_id(tablet_id());
gc_pb.set_num_segments(num_segments);
- WARN_IF_ERROR(
- _data_dir->get_meta()->put(META_COLUMN_FAMILY_INDEX, gc_key,
gc_pb.SerializeAsString()),
- fmt::format("Failed to record unused remote rowset(tablet id: {},
rowset id: {})",
- tablet_id(), rowset_id.to_string()));
+ auto st =
+ _data_dir->get_meta()->put(META_COLUMN_FAMILY_INDEX, gc_key,
gc_pb.SerializeAsString());
+ if (!st.ok()) {
+ LOG(WARNING) << "failed to record unused remote rowset. tablet_id=" <<
tablet_id()
+ << " rowset_id=" << rowset_id << " resource_id=" <<
resource;
+ }
}
Status Tablet::remove_all_remote_rowsets() {
DCHECK(_state == TABLET_SHUTDOWN);
- if (storage_policy_id() == 0) {
+ std::set<std::string> resource_ids;
+ for (auto& rs_meta : _tablet_meta->all_rs_metas()) {
+ if (!rs_meta->is_local()) {
+ resource_ids.insert(rs_meta->resource_id());
+ }
+ }
+ if (resource_ids.empty()) {
return Status::OK();
}
auto tablet_gc_key = REMOTE_TABLET_GC_PREFIX + std::to_string(tablet_id());
- auto policy = get_storage_policy(storage_policy_id());
+ RemoteTabletGcPB gc_pb;
+ for (auto& resource_id : resource_ids) {
+ gc_pb.add_resource_ids(resource_id);
+ }
return _data_dir->get_meta()->put(META_COLUMN_FAMILY_INDEX, tablet_gc_key,
- std::to_string(policy->resource_id));
+ gc_pb.SerializeAsString());
}
TabletSchemaSPtr Tablet::tablet_schema() const {
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index da6a5b1c29..756835f1b7 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -588,8 +588,9 @@ void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) {
if (_preferred_rowset_type == BETA_ROWSET) {
tablet_meta_pb->set_preferred_rowset_type(_preferred_rowset_type);
}
-
- tablet_meta_pb->set_storage_policy_id(_storage_policy_id);
+ if (_storage_policy_id > 0) {
+ tablet_meta_pb->set_storage_policy_id(_storage_policy_id);
+ }
tablet_meta_pb->set_enable_unique_key_merge_on_write(_enable_unique_key_merge_on_write);
if (_enable_unique_key_merge_on_write) {
diff --git a/be/test/olap/test_data/header_without_inc_rs.txt
b/be/test/olap/test_data/header_without_inc_rs.txt
index 88b0422354..1aa0121ee8 100644
--- a/be/test/olap/test_data/header_without_inc_rs.txt
+++ b/be/test/olap/test_data/header_without_inc_rs.txt
@@ -106,6 +106,5 @@
"preferred_rowset_type": "BETA_ROWSET",
"tablet_type": "TABLET_TYPE_DISK",
"replica_id": 0,
- "enable_unique_key_merge_on_write": false,
- "storage_policy_id": 0
+ "enable_unique_key_merge_on_write": false
}
diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto
index 7fad92e321..4452373335 100644
--- a/gensrc/proto/olap_file.proto
+++ b/gensrc/proto/olap_file.proto
@@ -115,13 +115,18 @@ message RowsetMetaPB {
optional SegmentsOverlapPB segments_overlap_pb = 51 [default =
OVERLAP_UNKNOWN];
}
-// unused remote rowsets garbage collection kv value
+// kv value for reclaiming remote rowset
message RemoteRowsetGcPB {
required string resource_id = 1;
required int64 tablet_id = 2;
required int64 num_segments = 3;
}
+// kv value for reclaiming all remote rowsets of tablet
+message RemoteTabletGcPB {
+ repeated string resource_ids = 1;
+}
+
enum DataFileType {
OLAP_DATA_FILE = 0; //Deprecated. Only columnar-wise format is supported.
COLUMN_ORIENTED_FILE = 1;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]