This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push: new 5fccfbc2b [master] fix race in auto leader rebalancing 5fccfbc2b is described below commit 5fccfbc2bda0d017d283cbefcb3cb8e4b026c8e9 Author: Alexey Serbin <ale...@apache.org> AuthorDate: Thu May 9 12:32:22 2024 -0700 [master] fix race in auto leader rebalancing It turned out that auto leader rebalancing task wasn't explicitly shutdown upon shutting down catalog manager. That lead to race conditions as reported by TSAN, at least in test scenarios (see below). This patch addresses the issue. WARNING: ThreadSanitizer: data race (pid=23827) Write of size 1 at 0x7b4000008208 by main thread: #0 AnnotateRWLockDestroy thirdparty/src/llvm-11.0.0.src/projects/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp:264 (auto_rebalancer-test+0x33575e) #1 kudu::rw_spinlock::~rw_spinlock() src/kudu/util/locks.h:89:5 (libmaster.so+0x359376) #2 kudu::master::TSManager::~TSManager() src/kudu/master/ts_manager.cc:108:1 (libmaster.so+0x4ad201) #3 kudu::master::TSManager::~TSManager() src/kudu/master/ts_manager.cc:107:25 (libmaster.so+0x4ad229) #4 std::__1::default_delete<kudu::master::TSManager>::operator()(kudu::master::TSManager*) const thirdparty/installed/tsan/include/c++/v1/memory:2262:5 (libmaster.so+0x407ce7) #5 std::__1::unique_ptr<kudu::master::TSManager, std::__1::default_delete<kudu::master::TSManager> >::reset(kudu::master::TSManager*) thirdparty/installed/tsan/include/c++/v1/memory:2517:7 (libmaster.so+0x40157d) #6 std::__1::unique_ptr<kudu::master::TSManager, std::__1::default_delete<kudu::master::TSManager> >::~unique_ptr() thirdparty/installed/tsan/include/c++/v1/memory:2471:19 (libmaster.so+0x4015eb) #7 kudu::master::Master::~Master() src/kudu/master/master.cc:263:1 (libmaster.so+0x3f7a4a) #8 kudu::master::Master::~Master() src/kudu/master/master.cc:261:19 (libmaster.so+0x3f7dc9) #9 std::__1::default_delete<kudu::master::Master>::operator()(kudu::master::Master*) const thirdparty/installed/tsan/include/c++/v1/memory:2262:5 (libmaster.so+0x435627) #10 std::__1::unique_ptr<kudu::master::Master, std::__1::default_delete<kudu::master::Master> >::reset(kudu::master::Master*) thirdparty/installed/tsan/include/c++/v1/memory:2517:7 (libmaster.so+0x42e6ed) #11 kudu::master::MiniMaster::Shutdown() src/kudu/master/mini_master.cc:120:13 (libmaster.so+0x4c2612) ... Previous atomic write of size 4 at 0x7b4000008208 by thread T439 (mutexes: write M1141235379631443968): #0 __tsan_atomic32_compare_exchange_strong thirdparty/src/llvm-11.0.0.src/projects/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp:780 (auto_rebalancer-test+0x33eb60) #1 base::subtle::Release_CompareAndSwap(int volatile*, int, int) /src/kudu/gutil/atomicops-internals-tsan.h:88:3 (libmaster.so+0x2e2b34) #2 kudu::rw_semaphore::unlock_shared() src/kudu/util/rw_semaphore.h:91:19 (libmaster.so+0x2e29c8) #3 kudu::rw_spinlock::unlock_shared() src/kudu/util/locks.h:99:10 (libmaster.so+0x2e28ef) #4 std::__1::shared_lock<kudu::rw_spinlock>::~shared_lock() /thirdparty/installed/tsan/include/c++/v1/shared_mutex:369:19 (libmaster.so+0x2e23e0) #5 kudu::master::TSManager::GetAllDescriptors(std::__1::vector<std::__1::shared_ptr<kudu::master::TSDescriptor>, std::__1::allocator<std::__1::shared_ptr<kudu::master::TSDescriptor> > >*) const src/kudu/master/ts_manager.cc:206:1 (libmaster.so+0x4adeb6) #6 kudu::master::AutoLeaderRebalancerTask::RunLeaderRebalancer() src/kudu/master/auto_leader_rebalancer.cc:405:16 (libmaster.so+0x2fb51b) #7 kudu::master::AutoLeaderRebalancerTask::RunLoop() src/kudu/master/auto_leader_rebalancer.cc:445:7 (libmaster.so+0x2fbaa9) This is a follow-up to 10efaf2c77dfe5e4474505e0267c583c011703be. Change-Id: Iccd66d00280d22b37386230874937e5260f07f3b Reviewed-on: http://gerrit.cloudera.org:8080/21417 Reviewed-by: Wang Xixu <1450306...@qq.com> Tested-by: Alexey Serbin <ale...@apache.org> Reviewed-by: Yifan Zhang <chinazhangyi...@163.com> --- src/kudu/master/auto_leader_rebalancer.cc | 6 +++++- src/kudu/master/catalog_manager.cc | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/kudu/master/auto_leader_rebalancer.cc b/src/kudu/master/auto_leader_rebalancer.cc index 629afb77d..fdf058623 100644 --- a/src/kudu/master/auto_leader_rebalancer.cc +++ b/src/kudu/master/auto_leader_rebalancer.cc @@ -101,7 +101,11 @@ AutoLeaderRebalancerTask::AutoLeaderRebalancerTask(CatalogManager* catalog_manag number_of_loop_iterations_for_test_(0), moves_scheduled_this_round_for_test_(0) {} -AutoLeaderRebalancerTask::~AutoLeaderRebalancerTask() { Shutdown(); } +AutoLeaderRebalancerTask::~AutoLeaderRebalancerTask() { + if (thread_) { + Shutdown(); + } +} Status AutoLeaderRebalancerTask::Init() { DCHECK(!thread_) << "AutoleaderRebalancerTask is already initialized"; diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc index 4dc260801..42aeee833 100644 --- a/src/kudu/master/catalog_manager.cc +++ b/src/kudu/master/catalog_manager.cc @@ -1727,6 +1727,10 @@ void CatalogManager::Shutdown() { auto_rebalancer_->Shutdown(); } + if (auto_leader_rebalancer_) { + auto_leader_rebalancer_->Shutdown(); + } + // Mark all outstanding table tasks as aborted and wait for them to fail. // // There may be an outstanding table visitor thread modifying the table map,