This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 11c81822e3e0f71f942c12c310162037c8159600 Author: yujun <[email protected]> AuthorDate: Mon Oct 16 20:25:20 2023 +0800 [fix](path gc) fix data dir path gc (#25420) --- be/src/olap/data_dir.cpp | 25 +++++++++++++++++-------- be/src/olap/data_dir.h | 8 +++++--- be/src/olap/olap_server.cpp | 8 +++----- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 9e01b75ba89..393db80d643 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -603,14 +603,25 @@ void DataDir::remove_pending_ids(const std::string& id) { _pending_path_ids.erase(id); } -// gc unused tablet schemahash dir -void DataDir::perform_path_gc_by_tablet() { +void DataDir::perform_path_gc() { std::unique_lock<std::mutex> lck(_check_path_mutex); - _check_path_cv.wait( - lck, [this] { return _stop_bg_worker || !_all_tablet_schemahash_paths.empty(); }); + _check_path_cv.wait(lck, [this] { + return _stop_bg_worker || !_all_tablet_schemahash_paths.empty() || + !_all_check_paths.empty(); + }); if (_stop_bg_worker) { return; } + + _perform_path_gc_by_tablet(); + _perform_path_gc_by_rowsetid(); +} + +// gc unused tablet schemahash dir +void DataDir::_perform_path_gc_by_tablet() { + if (_all_tablet_schemahash_paths.empty()) { + return; + } LOG(INFO) << "start to path gc by tablet schemahash."; int counter = 0; for (const auto& path : _all_tablet_schemahash_paths) { @@ -652,12 +663,10 @@ void DataDir::perform_path_gc_by_tablet() { LOG(INFO) << "finished one time path gc by tablet."; } -void DataDir::perform_path_gc_by_rowsetid() { +void DataDir::_perform_path_gc_by_rowsetid() { // init the set of valid path // validate the path in data dir - std::unique_lock<std::mutex> lck(_check_path_mutex); - _check_path_cv.wait(lck, [this] { return _stop_bg_worker || !_all_check_paths.empty(); }); - if (_stop_bg_worker) { + if (_all_check_paths.empty()) { return; } LOG(INFO) << "start to path gc by rowsetid."; diff --git a/be/src/olap/data_dir.h b/be/src/olap/data_dir.h index 7d6684ad9b3..51261a19b1b 100644 --- a/be/src/olap/data_dir.h +++ b/be/src/olap/data_dir.h @@ -114,9 +114,7 @@ public: // this is a producer function. After scan, it will notify the perform_path_gc function to gc Status perform_path_scan(); - void perform_path_gc_by_rowsetid(); - - void perform_path_gc_by_tablet(); + void perform_path_gc(); void perform_remote_rowset_gc(); @@ -171,6 +169,10 @@ private: bool _check_pending_ids(const std::string& id); + void _perform_path_gc_by_tablet(); + + void _perform_path_gc_by_rowsetid(); + private: std::atomic<bool> _stop_bg_worker = false; diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 369b0c48d87..401495bed2b 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -358,11 +358,8 @@ void StorageEngine::_path_gc_thread_callback(DataDir* data_dir) { LOG(INFO) << "try to start path gc thread!"; int32_t interval = config::path_gc_check_interval_second; do { - LOG(INFO) << "try to perform path gc by tablet!"; - data_dir->perform_path_gc_by_tablet(); - - LOG(INFO) << "try to perform path gc by rowsetid!"; - data_dir->perform_path_gc_by_rowsetid(); + LOG(INFO) << "try to perform path gc!"; + data_dir->perform_path_gc(); interval = config::path_gc_check_interval_second; if (interval <= 0) { @@ -372,6 +369,7 @@ void StorageEngine::_path_gc_thread_callback(DataDir* data_dir) { } } while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval)) && !k_doris_exit); + LOG(INFO) << "stop path gc thread!"; } void StorageEngine::_path_scan_thread_callback(DataDir* data_dir) { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
