This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new fc739b9 [threadpool] fix race in ThreadPoolToken::Submit()
fc739b9 is described below
commit fc739b91e0c414bfbcd71669518801484fd021c0
Author: Alexey Serbin <[email protected]>
AuthorDate: Thu Mar 18 20:08:43 2021 -0700
[threadpool] fix race in ThreadPoolToken::Submit()
This patch fixes a race between calling ThreadPoolToken::Submit()
and destructing the token concurrently from another thread.
The patch moves the update of the thread pool token's queue length
metric under the lock used in the ThreadPool::DoSubmit() method.
The motivation for this patch was seeing the following TSAN report
while running Params/ScanYourWritesParamTest.Test/1:
WARNING: ThreadSanitizer: data race (pid=18290)
Write of size 8 at 0x7b2c0002b0e8 by main thread:
#0 operator delete(void*)
#1
std::__1::default_delete<kudu::ThreadPoolToken>::operator()(kudu::ThreadPoolToken*)
#2 std::__1::unique_ptr<kudu::ThreadPoolToken,
std::__1::default_delete<kudu::ThreadPoolToken> >::reset(kudu::ThreadPoolToken*)
#3 std::__1::unique_ptr<kudu::ThreadPoolToken,
std::__1::default_delete<kudu::ThreadPoolToken> >::~unique_ptr()
#4 kudu::consensus::RaftConsensus::~RaftConsensus()
src/kudu/consensus/raft_consensus.cc:210:1
#5 ...
#6 std::__1::__shared_count::__release_shared()
#7 std::__1::__shared_weak_count::__release_shared()
#8 std::__1::shared_ptr<kudu::consensus::RaftConsensus>::~shared_ptr()
#9 kudu::tablet::TabletReplica::~TabletReplica()
src/kudu/tablet/tablet_replica.cc:195:1
.....
Previous read of size 8 at 0x7b2c0002b0e8 by thread T125:
#0 scoped_refptr<kudu::Histogram>::operator kudu::Histogram*
scoped_refptr<kudu::Histogram>::*() const
#1 kudu::ThreadPool::DoSubmit(std::__1::function<void ()>,
kudu::ThreadPoolToken*) src/kudu/util/threadpool.cc:523:7
#2 kudu::ThreadPoolToken::Submit(std::__1::function<void ()>)
src/kudu/util/threadpool.cc:124:17
#3 kudu::consensus::Peer::SignalRequest(bool)
src/kudu/consensus/consensus_peers.cc:188:28
#4 kudu::consensus::Peer::Init()::$_0::operator()()
src/kudu/consensus/consensus_peers.cc:161:14
#5 ...
#6 ...
#7 ...
#8 ...
#9 ...
#10 ...
#11 kudu::rpc::PeriodicTimer::Callback(long)
.....
Change-Id: I2b17e4b2b634624fbc51e8ee05749a56f6609f62
Reviewed-on: http://gerrit.cloudera.org:8080/17205
Tested-by: Alexey Serbin <[email protected]>
Reviewed-by: Grant Henke <[email protected]>
---
src/kudu/util/threadpool.cc | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/src/kudu/util/threadpool.cc b/src/kudu/util/threadpool.cc
index 9f413ad..6297432 100644
--- a/src/kudu/util/threadpool.cc
+++ b/src/kudu/util/threadpool.cc
@@ -515,14 +515,22 @@ Status ThreadPool::DoSubmit(std::function<void()> f,
ThreadPoolToken* token) {
idle_threads_.pop_front();
}
NotifyLoadMeterUnlocked();
+
+ // Update the token's queue length histogram under a lock. Submitting a task
+ // via a pool token and destroying the token might be run concurrently by
+ // differen threads. If updating the metric after releasing the 'lock_', the
+ // destructor for the token might run after 'lock_' is released since the
+ // other thread might already be executing the code of the ThreadPoolToken's
+ // destructor, just waiting to acquire 'lock_' in
ThreadPoolToken::Shutdown().
+ if (token->metrics_.queue_length_histogram) {
+ token->metrics_.queue_length_histogram->Increment(length_at_submit);
+ }
+
guard.Unlock();
if (metrics_.queue_length_histogram) {
metrics_.queue_length_histogram->Increment(length_at_submit);
}
- if (token->metrics_.queue_length_histogram) {
- token->metrics_.queue_length_histogram->Increment(length_at_submit);
- }
if (need_a_thread) {
Status status = CreateThread();