This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 5fccfbc2b [master] fix race in auto leader rebalancing
5fccfbc2b is described below

commit 5fccfbc2bda0d017d283cbefcb3cb8e4b026c8e9
Author: Alexey Serbin <ale...@apache.org>
AuthorDate: Thu May 9 12:32:22 2024 -0700

    [master] fix race in auto leader rebalancing
    
    It turned out that auto leader rebalancing task wasn't explicitly
    shutdown upon shutting down catalog manager.  That lead to race
    conditions as reported by TSAN, at least in test scenarios (see below).
    This patch addresses the issue.
    
      WARNING: ThreadSanitizer: data race (pid=23827)
        Write of size 1 at 0x7b4000008208 by main thread:
          #0 AnnotateRWLockDestroy 
thirdparty/src/llvm-11.0.0.src/projects/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp:264
 (auto_rebalancer-test+0x33575e)
          #1 kudu::rw_spinlock::~rw_spinlock() src/kudu/util/locks.h:89:5 
(libmaster.so+0x359376)
          #2 kudu::master::TSManager::~TSManager() 
src/kudu/master/ts_manager.cc:108:1 (libmaster.so+0x4ad201)
          #3 kudu::master::TSManager::~TSManager() 
src/kudu/master/ts_manager.cc:107:25 (libmaster.so+0x4ad229)
          #4 
std::__1::default_delete<kudu::master::TSManager>::operator()(kudu::master::TSManager*)
 const thirdparty/installed/tsan/include/c++/v1/memory:2262:5 
(libmaster.so+0x407ce7)
          #5 std::__1::unique_ptr<kudu::master::TSManager, 
std::__1::default_delete<kudu::master::TSManager> 
>::reset(kudu::master::TSManager*) 
thirdparty/installed/tsan/include/c++/v1/memory:2517:7 (libmaster.so+0x40157d)
          #6 std::__1::unique_ptr<kudu::master::TSManager, 
std::__1::default_delete<kudu::master::TSManager> >::~unique_ptr() 
thirdparty/installed/tsan/include/c++/v1/memory:2471:19 (libmaster.so+0x4015eb)
          #7 kudu::master::Master::~Master() src/kudu/master/master.cc:263:1 
(libmaster.so+0x3f7a4a)
          #8 kudu::master::Master::~Master() src/kudu/master/master.cc:261:19 
(libmaster.so+0x3f7dc9)
          #9 
std::__1::default_delete<kudu::master::Master>::operator()(kudu::master::Master*)
 const thirdparty/installed/tsan/include/c++/v1/memory:2262:5 
(libmaster.so+0x435627)
          #10 std::__1::unique_ptr<kudu::master::Master, 
std::__1::default_delete<kudu::master::Master> >::reset(kudu::master::Master*) 
thirdparty/installed/tsan/include/c++/v1/memory:2517:7 (libmaster.so+0x42e6ed)
          #11 kudu::master::MiniMaster::Shutdown() 
src/kudu/master/mini_master.cc:120:13 (libmaster.so+0x4c2612)
        ...
        Previous atomic write of size 4 at 0x7b4000008208 by thread T439 
(mutexes: write M1141235379631443968):
          #0 __tsan_atomic32_compare_exchange_strong 
thirdparty/src/llvm-11.0.0.src/projects/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp:780
 (auto_rebalancer-test+0x33eb60)
          #1 base::subtle::Release_CompareAndSwap(int volatile*, int, int) 
/src/kudu/gutil/atomicops-internals-tsan.h:88:3 (libmaster.so+0x2e2b34)
          #2 kudu::rw_semaphore::unlock_shared() 
src/kudu/util/rw_semaphore.h:91:19 (libmaster.so+0x2e29c8)
          #3 kudu::rw_spinlock::unlock_shared() src/kudu/util/locks.h:99:10 
(libmaster.so+0x2e28ef)
          #4 std::__1::shared_lock<kudu::rw_spinlock>::~shared_lock() 
/thirdparty/installed/tsan/include/c++/v1/shared_mutex:369:19 
(libmaster.so+0x2e23e0)
          #5 
kudu::master::TSManager::GetAllDescriptors(std::__1::vector<std::__1::shared_ptr<kudu::master::TSDescriptor>,
 std::__1::allocator<std::__1::shared_ptr<kudu::master::TSDescriptor> > >*) 
const src/kudu/master/ts_manager.cc:206:1 (libmaster.so+0x4adeb6)
          #6 kudu::master::AutoLeaderRebalancerTask::RunLeaderRebalancer() 
src/kudu/master/auto_leader_rebalancer.cc:405:16 (libmaster.so+0x2fb51b)
          #7 kudu::master::AutoLeaderRebalancerTask::RunLoop() 
src/kudu/master/auto_leader_rebalancer.cc:445:7 (libmaster.so+0x2fbaa9)
    
    This is a follow-up to 10efaf2c77dfe5e4474505e0267c583c011703be.
    
    Change-Id: Iccd66d00280d22b37386230874937e5260f07f3b
    Reviewed-on: http://gerrit.cloudera.org:8080/21417
    Reviewed-by: Wang Xixu <1450306...@qq.com>
    Tested-by: Alexey Serbin <ale...@apache.org>
    Reviewed-by: Yifan Zhang <chinazhangyi...@163.com>
---
 src/kudu/master/auto_leader_rebalancer.cc | 6 +++++-
 src/kudu/master/catalog_manager.cc        | 4 ++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/kudu/master/auto_leader_rebalancer.cc 
b/src/kudu/master/auto_leader_rebalancer.cc
index 629afb77d..fdf058623 100644
--- a/src/kudu/master/auto_leader_rebalancer.cc
+++ b/src/kudu/master/auto_leader_rebalancer.cc
@@ -101,7 +101,11 @@ 
AutoLeaderRebalancerTask::AutoLeaderRebalancerTask(CatalogManager* catalog_manag
       number_of_loop_iterations_for_test_(0),
       moves_scheduled_this_round_for_test_(0) {}
 
-AutoLeaderRebalancerTask::~AutoLeaderRebalancerTask() { Shutdown(); }
+AutoLeaderRebalancerTask::~AutoLeaderRebalancerTask() {
+  if (thread_) {
+    Shutdown();
+  }
+}
 
 Status AutoLeaderRebalancerTask::Init() {
   DCHECK(!thread_) << "AutoleaderRebalancerTask is already initialized";
diff --git a/src/kudu/master/catalog_manager.cc 
b/src/kudu/master/catalog_manager.cc
index 4dc260801..42aeee833 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -1727,6 +1727,10 @@ void CatalogManager::Shutdown() {
     auto_rebalancer_->Shutdown();
   }
 
+  if (auto_leader_rebalancer_) {
+    auto_leader_rebalancer_->Shutdown();
+  }
+
   // Mark all outstanding table tasks as aborted and wait for them to fail.
   //
   // There may be an outstanding table visitor thread modifying the table map,

Reply via email to