This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new afa3c297743 [fix](gclog) Skip tablet dir without schema hash dir in
path gc (#32793)
afa3c297743 is described below
commit afa3c297743715251ad1349594bc60bed0e0a05d
Author: plat1ko <[email protected]>
AuthorDate: Tue Mar 26 17:44:16 2024 +0800
[fix](gclog) Skip tablet dir without schema hash dir in path gc (#32793)
---
be/src/olap/data_dir.cpp | 280 ++++++++++++++++++++---------------------
be/src/olap/data_dir.h | 6 +-
be/src/olap/tablet_manager.cpp | 2 +-
be/test/olap/path_gc_test.cpp | 21 +---
4 files changed, 140 insertions(+), 169 deletions(-)
diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index b47232b9690..d1b5185b789 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -638,171 +638,144 @@ Status DataDir::load() {
return Status::OK();
}
-void DataDir::perform_path_gc() {
- auto tablet_paths = _perform_path_scan();
- _perform_path_gc_by_tablet(tablet_paths);
- _perform_path_gc_by_rowset(tablet_paths);
-}
+// gc unused local tablet dir
+void DataDir::_perform_tablet_gc(const std::string& tablet_schema_hash_path) {
+ if (_stop_bg_worker) {
+ return;
+ }
-// gc unused tablet schemahash dir
-void DataDir::_perform_path_gc_by_tablet(std::vector<std::string>&
tablet_paths) {
- if (_stop_bg_worker || tablet_paths.empty()) {
+ TTabletId tablet_id = -1;
+ TSchemaHash schema_hash = -1;
+ bool is_valid = TabletManager::get_tablet_id_and_schema_hash_from_path(
+ tablet_schema_hash_path, &tablet_id, &schema_hash);
+ if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
+ LOG(WARNING) << "[path gc] unknown path: " << tablet_schema_hash_path;
return;
}
- LOG(INFO) << "start to path gc by tablet, dir=" << _path;
- // Use double pointer to avoid move elements after erase a element
- auto forward = tablet_paths.begin();
- auto backward = tablet_paths.end();
- int counter = 0;
- do {
- if (config::path_gc_check_step > 0 && counter %
config::path_gc_check_step == 0) {
- std::this_thread::sleep_for(
-
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
- }
- auto& path = *forward;
- TTabletId tablet_id = -1;
- TSchemaHash schema_hash = -1;
- bool is_valid =
TabletManager::get_tablet_id_and_schema_hash_from_path(path, &tablet_id,
-
&schema_hash);
- if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
- LOG(WARNING) << "unknown path:" << path;
- --backward;
- std::swap(*forward, *backward);
- continue;
- }
- auto tablet = _engine.tablet_manager()->get_tablet(tablet_id);
- if (!tablet || tablet->data_dir() != this) {
- if (tablet) {
- LOG(INFO) << "The tablet in path " << path
- << " is not same with the running one: " <<
tablet->data_dir()->_path
- << "/" << tablet->tablet_path()
- << ", might be the old tablet after migration, try
to move it to trash";
- }
- _engine.tablet_manager()->try_delete_unused_tablet_path(this,
tablet_id, schema_hash,
- path);
- --backward;
- std::swap(*forward, *backward);
- continue;
+ auto tablet = _engine.tablet_manager()->get_tablet(tablet_id);
+ if (!tablet || tablet->data_dir() != this) {
+ if (tablet) {
+ LOG(INFO) << "The tablet in path " << tablet_schema_hash_path
+ << " is not same with the running one: " <<
tablet->data_dir()->_path << "/"
+ << tablet->tablet_path()
+ << ", might be the old tablet after migration, try to
move it to trash";
}
- // could find the tablet, then skip check it
- ++forward;
- } while (forward != backward && !_stop_bg_worker);
- tablet_paths.erase(backward, tablet_paths.end());
- LOG(INFO) << "finish path gc by tablet, dir=" << _path;
+ _engine.tablet_manager()->try_delete_unused_tablet_path(this,
tablet_id, schema_hash,
+
tablet_schema_hash_path);
+ return;
+ }
+
+ _perform_rowset_gc(tablet_schema_hash_path);
}
-void DataDir::_perform_path_gc_by_rowset(const std::vector<std::string>&
tablet_paths) {
- if (_stop_bg_worker || tablet_paths.empty()) {
+// gc unused local rowsets under tablet dir
+void DataDir::_perform_rowset_gc(const std::string& tablet_schema_hash_path) {
+ if (_stop_bg_worker) {
return;
}
- LOG(INFO) << "start to path gc by rowset";
- int counter = 0;
- for (const auto& path : tablet_paths) {
- if (_stop_bg_worker) {
- break;
- }
+ TTabletId tablet_id = -1;
+ TSchemaHash schema_hash = -1;
+ bool is_valid =
doris::TabletManager::get_tablet_id_and_schema_hash_from_path(
+ tablet_schema_hash_path, &tablet_id, &schema_hash);
+ if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
+ LOG(WARNING) << "[path gc] unknown path: " << tablet_schema_hash_path;
+ return;
+ }
- ++counter;
- if (config::path_gc_check_step > 0 && counter %
config::path_gc_check_step == 0) {
- std::this_thread::sleep_for(
-
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
- }
- TTabletId tablet_id = -1;
- TSchemaHash schema_hash = -1;
- bool is_valid =
doris::TabletManager::get_tablet_id_and_schema_hash_from_path(
- path, &tablet_id, &schema_hash);
- if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
- LOG(WARNING) << "[path gc] unknown path:" << path;
- continue;
- }
+ auto tablet = _engine.tablet_manager()->get_tablet(tablet_id);
+ if (!tablet) {
+ // Could not found the tablet, maybe it's a dropped tablet, will be
reclaimed
+ // in the next time `_perform_path_gc_by_tablet`
+ return;
+ }
- auto tablet = _engine.tablet_manager()->get_tablet(tablet_id);
- if (!tablet) {
- // Could not found the tablet, maybe it's a dropped tablet, will
be reclaimed
- // in the next time `_perform_path_gc_by_tablet`
- continue;
- }
+ if (tablet->data_dir() != this) {
+ // Current running tablet is not in same data_dir, maybe it's a tablet
after migration,
+ // will be reclaimed in the next time `_perform_path_gc_by_tablet`
+ return;
+ }
- if (tablet->data_dir() != this) {
- // Current running tablet is not in same data_dir, maybe it's a
tablet after migration,
- // will be reclaimed in the next time `_perform_path_gc_by_tablet`
- continue;
+ bool exists;
+ std::vector<io::FileInfo> files;
+ auto st = io::global_local_filesystem()->list(tablet_schema_hash_path,
true, &files, &exists);
+ if (!st.ok()) [[unlikely]] {
+ LOG(WARNING) << "[path gc] fail to list tablet path " <<
tablet_schema_hash_path << " : "
+ << st;
+ return;
+ }
+
+ // Rowset files excluding pending rowsets
+ std::vector<std::pair<RowsetId, std::string /* filename */>>
rowsets_not_pending;
+ for (auto&& file : files) {
+ auto rowset_id = extract_rowset_id(file.file_name);
+ if (rowset_id.hi == 0) {
+ continue; // Not a rowset
}
- bool exists;
- std::vector<io::FileInfo> files;
- auto st = io::global_local_filesystem()->list(path, true, &files,
&exists);
- if (!st.ok()) [[unlikely]] {
- LOG(WARNING) << "[path gc] fail to list tablet path " << path << "
: " << st;
- continue;
+ if (_engine.pending_local_rowsets().contains(rowset_id)) {
+ continue; // Pending rowset file
}
- // Rowset files excluding pending rowsets
- std::vector<std::pair<RowsetId, std::string /* filename */>>
rowsets_not_pending;
- for (auto&& file : files) {
- auto rowset_id = extract_rowset_id(file.file_name);
- if (rowset_id.hi == 0) {
- continue; // Not a rowset
- }
+ rowsets_not_pending.emplace_back(rowset_id, std::move(file.file_name));
+ }
- if (_engine.pending_local_rowsets().contains(rowset_id)) {
- continue; // Pending rowset file
- }
+ RowsetIdUnorderedSet rowsets_in_version_map;
+ tablet->traverse_rowsets(
+ [&rowsets_in_version_map](auto& rs) {
rowsets_in_version_map.insert(rs->rowset_id()); },
+ true);
- rowsets_not_pending.emplace_back(rowset_id,
std::move(file.file_name));
+ auto reclaim_rowset_file = [](const std::string& path) {
+ auto st = io::global_local_filesystem()->delete_file(path);
+ if (!st.ok()) [[unlikely]] {
+ LOG(WARNING) << "[path gc] failed to delete garbage rowset file: "
<< st;
+ return;
}
+ LOG(INFO) << "[path gc] delete garbage path: " << path; // Audit log
+ };
- RowsetIdUnorderedSet rowsets_in_version_map;
- tablet->traverse_rowsets(
- [&rowsets_in_version_map](auto& rs) {
- rowsets_in_version_map.insert(rs->rowset_id());
- },
- true);
+ auto should_reclaim = [&, this](const RowsetId& rowset_id) {
+ return !rowsets_in_version_map.contains(rowset_id) &&
+ !_engine.check_rowset_id_in_unused_rowsets(rowset_id) &&
+ RowsetMetaManager::exists(get_meta(), tablet->tablet_uid(),
rowset_id)
+ .is<META_KEY_NOT_FOUND>();
+ };
- auto reclaim_rowset_file = [](const std::string& path) {
- auto st = io::global_local_filesystem()->delete_file(path);
- if (!st.ok()) [[unlikely]] {
- LOG(WARNING) << "[path gc] failed to delete garbage rowset
file: " << st;
- return;
- }
- LOG(INFO) << "[path gc] delete garbage path: " << path; // Audit
log
- };
-
- auto should_reclaim = [&, this](const RowsetId& rowset_id) {
- return !rowsets_in_version_map.contains(rowset_id) &&
- !_engine.check_rowset_id_in_unused_rowsets(rowset_id) &&
- RowsetMetaManager::exists(get_meta(), tablet->tablet_uid(),
rowset_id)
- .is<META_KEY_NOT_FOUND>();
- };
-
- // rowset_id -> is_garbage
- std::unordered_map<RowsetId, bool> checked_rowsets;
- for (auto&& [rowset_id, filename] : rowsets_not_pending) {
- if (auto it = checked_rowsets.find(rowset_id); it !=
checked_rowsets.end()) {
- if (it->second) { // Is checked garbage rowset
- reclaim_rowset_file(path + '/' + filename);
- }
- continue;
+ // rowset_id -> is_garbage
+ std::unordered_map<RowsetId, bool> checked_rowsets;
+ for (auto&& [rowset_id, filename] : rowsets_not_pending) {
+ if (_stop_bg_worker) {
+ return;
+ }
+
+ if (auto it = checked_rowsets.find(rowset_id); it !=
checked_rowsets.end()) {
+ if (it->second) { // Is checked garbage rowset
+ reclaim_rowset_file(tablet_schema_hash_path + '/' + filename);
}
+ continue;
+ }
- if (should_reclaim(rowset_id)) {
- reclaim_rowset_file(path + '/' + filename);
- checked_rowsets.emplace(rowset_id, true);
- } else {
- checked_rowsets.emplace(rowset_id, false);
+ if (should_reclaim(rowset_id)) {
+ if (config::path_gc_check_step > 0 &&
+ ++_path_gc_step % config::path_gc_check_step == 0) {
+ std::this_thread::sleep_for(
+
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
}
+ reclaim_rowset_file(tablet_schema_hash_path + '/' + filename);
+ checked_rowsets.emplace(rowset_id, true);
+ } else {
+ checked_rowsets.emplace(rowset_id, false);
}
}
- LOG(INFO) << "finish path gc by rowset.";
}
-// path producer
-std::vector<std::string> DataDir::_perform_path_scan() {
- std::vector<std::string> tablet_paths;
- if (_stop_bg_worker) return tablet_paths;
- LOG(INFO) << "start to scan data dir " << _path;
+void DataDir::perform_path_gc() {
+ if (_stop_bg_worker) {
+ return;
+ }
+
+ LOG(INFO) << "start to gc data dir " << _path;
auto data_path = fmt::format("{}/{}", _path, DATA_PREFIX);
std::vector<io::FileInfo> shards;
bool exists = true;
@@ -810,13 +783,18 @@ std::vector<std::string> DataDir::_perform_path_scan() {
auto st = fs->list(data_path, false, &shards, &exists);
if (!st.ok()) [[unlikely]] {
LOG(WARNING) << "failed to scan data dir: " << st;
- return tablet_paths;
+ return;
}
+
for (const auto& shard : shards) {
- if (_stop_bg_worker) break;
+ if (_stop_bg_worker) {
+ break;
+ }
+
if (shard.is_file) {
continue;
}
+
auto shard_path = fmt::format("{}/{}", data_path, shard.file_name);
std::vector<io::FileInfo> tablet_ids;
st = io::global_local_filesystem()->list(shard_path, false,
&tablet_ids, &exists);
@@ -824,31 +802,41 @@ std::vector<std::string> DataDir::_perform_path_scan() {
LOG(WARNING) << "fail to walk dir, shard_path=" << shard_path << "
: " << st;
continue;
}
+
for (const auto& tablet_id : tablet_ids) {
if (_stop_bg_worker) {
break;
}
+
if (tablet_id.is_file) {
continue;
}
+
auto tablet_id_path = fmt::format("{}/{}", shard_path,
tablet_id.file_name);
std::vector<io::FileInfo> schema_hashes;
- st = io::global_local_filesystem()->list(tablet_id_path, false,
&schema_hashes,
- &exists);
+ st = fs->list(tablet_id_path, false, &schema_hashes, &exists);
if (!st.ok()) [[unlikely]] {
LOG(WARNING) << "fail to walk dir, tablet_id_path=" <<
tablet_id_path << " : "
<< st;
continue;
}
- if (schema_hashes.size() != 1 || schema_hashes[0].is_file)
[[unlikely]] {
- LOG(WARNING) << "invalid tablet_path, path=" << tablet_id_path;
- continue;
+
+ for (auto&& schema_hash : schema_hashes) {
+ if (schema_hash.is_file) {
+ continue;
+ }
+
+ if (config::path_gc_check_step > 0 &&
+ ++_path_gc_step % config::path_gc_check_step == 0) {
+ std::this_thread::sleep_for(
+
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
+ }
+ _perform_tablet_gc(tablet_id_path + '/' +
schema_hash.file_name);
}
- tablet_paths.push_back(tablet_id_path + '/' +
schema_hashes[0].file_name);
}
}
- LOG(INFO) << "scan data dir path: " << _path << " finished. path size: "
<< tablet_paths.size();
- return tablet_paths;
+
+ LOG(INFO) << "gc data dir path: " << _path << " finished";
}
Status DataDir::update_capacity() {
diff --git a/be/src/olap/data_dir.h b/be/src/olap/data_dir.h
index 9961ccb6a55..cc13121bd1b 100644
--- a/be/src/olap/data_dir.h
+++ b/be/src/olap/data_dir.h
@@ -156,11 +156,11 @@ private:
// process will log fatal.
Status _check_incompatible_old_format_tablet();
- std::vector<std::string> _perform_path_scan();
+ int _path_gc_step {0};
- void _perform_path_gc_by_tablet(std::vector<std::string>& tablet_paths);
+ void _perform_tablet_gc(const std::string& tablet_schema_hash_path);
- void _perform_path_gc_by_rowset(const std::vector<std::string>&
tablet_paths);
+ void _perform_rowset_gc(const std::string& tablet_schema_hash_path);
private:
std::atomic<bool> _stop_bg_worker = false;
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index 2fa894c5929..fb34cb11964 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -845,7 +845,7 @@ Status TabletManager::load_tablet_from_meta(DataDir*
data_dir, TTabletId tablet_
// For case 1 doesn't need path check because BE is just starting and not
ready,
// just check tablet meta status to judge whether tablet is delete is
enough.
// For case 2, If a tablet has just been copied to local BE,
- // it may be cleared by gc-thread(see perform_path_gc_by_tablet) because
the tablet meta may not be loaded to memory.
+ // it may be cleared by gc-thread(see perform_tablet_gc) because the
tablet meta may not be loaded to memory.
// So clone task should check path and then failed and retry in this case.
if (check_path) {
bool exists = true;
diff --git a/be/test/olap/path_gc_test.cpp b/be/test/olap/path_gc_test.cpp
index 5d35cead868..1b2c5efbefe 100644
--- a/be/test/olap/path_gc_test.cpp
+++ b/be/test/olap/path_gc_test.cpp
@@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.
-#include <gen_cpp/Types_types.h>
#include <gtest/gtest.h>
#include <random>
@@ -45,7 +44,7 @@ TEST(PathGcTest, GcTabletAndRowset) {
ASSERT_TRUE(st.ok()) << st;
StorageEngine engine({});
- DataDir data_dir(engine, dir_path, -1, TStorageMedium::HDD);
+ DataDir data_dir(engine, dir_path);
st = data_dir._init_meta();
ASSERT_TRUE(st.ok()) << st;
@@ -82,23 +81,7 @@ TEST(PathGcTest, GcTabletAndRowset) {
ASSERT_TRUE(st.ok()) << st;
}
- // Test path scan
- auto paths = data_dir._perform_path_scan();
- ASSERT_EQ(paths.size(), 20);
-
// Test tablet gc
- config::path_gc_check_step = 0;
- data_dir._perform_path_gc_by_tablet(paths);
- ASSERT_EQ(paths.size(), 10);
- std::vector<std::string_view> expected_paths;
- for (auto&& tablet : active_tablets) {
- expected_paths.emplace_back(tablet->tablet_path());
- }
- std::sort(expected_paths.begin(), expected_paths.end());
- std::sort(paths.begin(), paths.end());
- for (size_t i = 0; i < paths.size(); ++i) {
- EXPECT_EQ(paths[i], expected_paths[i]);
- }
// Prepare rowsets
auto rng = std::default_random_engine
{static_cast<uint32_t>(::time(nullptr))};
@@ -186,7 +169,7 @@ TEST(PathGcTest, GcTabletAndRowset) {
}
// Test rowset gc
- data_dir._perform_path_gc_by_rowset(paths);
+ data_dir.perform_path_gc();
for (auto&& t : active_tablets) {
std::vector<io::FileInfo> files;
bool exists;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]