This is an automated email from the ASF dual-hosted git repository.
awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new c2e5373 [txns] fix race between tasks and destructor
c2e5373 is described below
commit c2e5373493afe43766b0d5b6f04dfbcd340d633d
Author: Andrew Wong <[email protected]>
AuthorDate: Wed Apr 7 16:13:50 2021 -0700
[txns] fix race between tasks and destructor
It previously possible for a CommitTask to be destructed before
completing the loop of scheduling all asynchronous tasks. This led to a
race as seen below:
WARNING: ThreadSanitizer: data race (pid=32435)
Write of size 8 at 0x7b1c000ce2d8 by thread T105 (mutexes: write
M424881254664896540):
#0 std::__1::__vector_base<std::__1::basic_string<char,
std::__1::char_traits<char>, std::__1::allocator<char> >,
std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>,
std::__1::allocator<char> > > >::__destruct_at_end(std::__1::basic_string<char,
std::__1::char_traits<char>, std::__1::allocator<char> >*)
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/vector:427:12
(txn_commit-itest+0x576cb1)
#1 std::__1::__vector_base<std::__1::basic_string<char,
std::__1::char_traits<char>, std::__1::allocator<char> >,
std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>,
std::__1::allocator<char> > > >::clear()
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/vector:369:29
(txn_commit-itest+0x5770d1)
#2 std::__1::__vector_base<std::__1::basic_string<char,
std::__1::char_traits<char>, std::__1::allocator<char> >,
std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>,
std::__1::allocator<char> > > >::~__vector_base()
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/vector:463:9
(txn_commit-itest+0x59caf9)
#3 std::__1::vector<std::__1::basic_string<char,
std::__1::char_traits<char>, std::__1::allocator<char> >,
std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>,
std::__1::allocator<char> > > >::~vector()
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/vector:555:5
(libtransactions.so+0x8c2a0)
#4 kudu::transactions::CommitTasks::~CommitTasks()
../src/kudu/transactions/txn_status_manager.h:177:26
(libtransactions.so+0xcce8b)
#5 kudu::RefCountedThreadSafe<kudu::transactions::CommitTasks,
kudu::DefaultRefCountedThreadSafeTraits<kudu::transactions::CommitTasks>
>::DeleteInternal(kudu::transactions::CommitTasks const*)
../src/kudu/gutil/ref_counted.h:153:44 (libtransactions.so+0xcce1a)
#6
kudu::DefaultRefCountedThreadSafeTraits<kudu::transactions::CommitTasks>::Destruct(kudu::transactions::CommitTasks
const*) ../src/kudu/gutil/ref_counted.h:116:5 (libtransactions.so+0xccdc8)
#7 kudu::RefCountedThreadSafe<kudu::transactions::CommitTasks,
kudu::DefaultRefCountedThreadSafeTraits<kudu::transactions::CommitTasks>
>::Release() const ../src/kudu/gutil/ref_counted.h:144:7
(libtransactions.so+0xccd70)
#8 scoped_refptr<kudu::transactions::CommitTasks>::~scoped_refptr()
../src/kudu/gutil/ref_counted.h:266:13 (libtransactions.so+0xbf785)
#9 std::__1::pair<long const,
scoped_refptr<kudu::transactions::CommitTasks> >::~pair()
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/utility:315:29
(libtransactions.so+0xc7652)
#10 void
std::__1::allocator_traits<std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, void*> >
>::__destroy<std::__1::pair<long const,
scoped_refptr<kudu::transactions::CommitTasks> >
>(std::__1::integral_constant<bool, false>,
std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, void*> >&,
std::__1::pair<long const, scoped_refptr<kud [...]
#11 void
std::__1::allocator_traits<std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, void*> >
>::destroy<std::__1::pair<long const,
scoped_refptr<kudu::transactions::CommitTasks> >
>(std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, void*> >&,
std::__1::pair<long const, scoped_refptr<kudu::transactions::CommitTasks> >*)
/data/3/aw [...]
#12
std::__1::__hash_node_destructor<std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, void*> >
>::operator()(std::__1::__hash_node<std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, void*>*)
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/__hash_table:844:13
(libtransactions.so+0xc740d)
#13
std::__1::unique_ptr<std::__1::__hash_node<std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, void*>,
std::__1::__hash_node_destructor<std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, void*> > >
>::reset(std::__1::__hash_node<std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, void*>*)
/data/3/awong/Repositories/kudu/thirdparty/installed [...]
#14
std::__1::unique_ptr<std::__1::__hash_node<std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, void*>,
std::__1::__hash_node_destructor<std::__1::allocator<std::__1::__hash_node<std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, void*> > > >::~unique_ptr()
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/memory:2547:19
(libtransactions.so+0xc6cbc)
#15 std::__1::__hash_table<std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >,
std::__1::__unordered_map_hasher<long, std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, std::__1::hash<long>, true>,
std::__1::__unordered_map_equal<long, std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, std::__1::equal_to<long>,
true>, std::__1::allocator<std::__1::__hash_value_type<long, scoped_refptr [...]
#16 std::__1::unordered_map<long,
scoped_refptr<kudu::transactions::CommitTasks>, std::__1::hash<long>,
std::__1::equal_to<long>, std::__1::allocator<std::__1::pair<long const,
scoped_refptr<kudu::transactions::CommitTasks> > >
>::erase(std::__1::__hash_map_iterator<std::__1::__hash_iterator<std::__1::__hash_node<std::__1::__hash_value_type<long,
scoped_refptr<kudu::transactions::CommitTasks> >, void*>*> >)
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/u [...]
#17 kudu::transactions::TxnStatusManager::RemoveCommitTask(long,
kudu::transactions::CommitTasks const*)
../src/kudu/transactions/txn_status_manager.h:433:26
(libtransactions.so+0xbefc6)
#18 kudu::transactions::CommitTasks::IsShuttingDownCleanupIfLastOp()
../src/kudu/transactions/txn_status_manager.cc:181:28
(libtransactions.so+0x97dea)
#19
kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2::operator()(kudu::Status
const&) const ../src/kudu/transactions/txn_status_manager.cc:319:9
(libtransactions.so+0xaefd6)
#20
decltype(std::__1::forward<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2&>(fp)(std::__1::forward<kudu::Status
const&>(fp0)))
std::__1::__invoke<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2&,
kudu::Status
const&>(kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2&,
kudu::Status const&)
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/type_traits:3530:1
(libtransactions.so+0xaeefd)
#21 void
std::__1::__invoke_void_return_wrapper<void>::__call<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2&,
kudu::Status
const&>(kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2&,
kudu::Status const&)
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/__functional_base:348:9
(libtransactions.so+0xaee3d)
#22
std::__1::__function::__alloc_func<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2,
std::__1::allocator<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2>,
void (kudu::Status const&)>::operator()(kudu::Status const&)
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:1533:16
(libtransactions.so+0xaedbd)
#23
std::__1::__function::__func<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2,
std::__1::allocator<kudu::transactions::CommitTasks::AbortTxnAsyncTask(int)::$_2>,
void (kudu::Status const&)>::operator()(kudu::Status const&)
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:1707:12
(libtransactions.so+0xad06c)
#24 std::__1::__function::__value_func<void (kudu::Status
const&)>::operator()(kudu::Status const&) const
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:1860:16
(libmaster.so+0x32ca24)
#25 std::__1::function<void (kudu::Status
const&)>::operator()(kudu::Status const&) const
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:2419:12
(libmaster.so+0x31d80b)
#26 kudu::transactions::ParticipantRpc::Finish(kudu::Status const&)
../src/kudu/transactions/participant_rpc.cc:227:3 (libtransactions.so+0x7f3e7)
...
Previous read of size 8 at 0x7b1c000ce2d8 by thread T186 (mutexes: read
M322424363142217872):
#0 std::__1::vector<std::__1::basic_string<char,
std::__1::char_traits<char>, std::__1::allocator<char> >,
std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>,
std::__1::allocator<char> > > >::size() const
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/vector:656:46
(libtransactions.so+0x8d2f9)
#1 kudu::transactions::CommitTasks::AbortTxnAsync()
../src/kudu/transactions/txn_status_manager.cc:365:42
(libtransactions.so+0x989d2)
#2 kudu::transactions::TxnStatusManager::BeginAbortTransaction(long,
boost::optional<std::__1::basic_string<char, std::__1::char_traits<char>,
std::__1::allocator<char> > > const&, kudu::tserver::TabletServerErrorPB*)
../src/kudu/transactions/txn_status_manager.cc:1219:25
(libtransactions.so+0xa3cc6)
#3
kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3::operator()()
const ../src/kudu/transactions/txn_status_manager.cc:378:3
(libtransactions.so+0xb245d)
#4
decltype(std::__1::forward<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3&>(fp)())
std::__1::__invoke<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3&>(kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3&)
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/type_traits:3530:1
(libtransactions.so+0xb2180)
#5 void
std::__1::__invoke_void_return_wrapper<void>::__call<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3&>(kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3&)
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/__functional_base:348:9
(libtransactions.so+0xb20e0)
#6
std::__1::__function::__alloc_func<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3,
std::__1::allocator<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3>,
void ()>::operator()()
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:1533:16
(libtransactions.so+0xb2080)
#7
std::__1::__function::__func<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3,
std::__1::allocator<kudu::transactions::CommitTasks::ScheduleBeginAbortTxnWrite()::$_3>,
void ()>::operator()()
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:1707:12
(libtransactions.so+0xb042f)
#8 std::__1::__function::__value_func<void ()>::operator()() const
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:1860:16
(libtserver_test_util.so+0x58396)
#9 std::__1::function<void ()>::operator()() const
/data/3/awong/Repositories/kudu/thirdparty/installed/tsan/include/c++/v1/functional:2419:12
(libtserver_test_util.so+0x58098)
...
This patch fixes this by caching the size before iterating. Prior to
this patch, the test failed in TSAN mode 3/100 times. With this patch,
it passed 1000/1000 times.
Change-Id: Ic974354b300f2a6c1b04505e740249273f33b80c
Reviewed-on: http://gerrit.cloudera.org:8080/17283
Reviewed-by: Alexey Serbin <[email protected]>
Tested-by: Kudu Jenkins
---
src/kudu/transactions/txn_status_manager.cc | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/src/kudu/transactions/txn_status_manager.cc
b/src/kudu/transactions/txn_status_manager.cc
index 837272c..1a2e63e 100644
--- a/src/kudu/transactions/txn_status_manager.cc
+++ b/src/kudu/transactions/txn_status_manager.cc
@@ -361,8 +361,11 @@ void CommitTasks::AbortTxnAsync() {
if (participant_ids_.empty()) {
ScheduleFinalizeAbortTxnWrite();
} else {
- ops_in_flight_ = participant_ids_.size();
- for (int i = 0; i < participant_ids_.size(); i++) {
+ // NOTE: the final AbortTxnAsyncTask() call may destruct this CommitTask
+ // and its members, so cache the participant ID size.
+ const auto participant_ids_size = participant_ids_.size();
+ ops_in_flight_ = participant_ids_size;
+ for (int i = 0; i < participant_ids_size; i++) {
AbortTxnAsyncTask(i);
}
}
@@ -434,8 +437,11 @@ void CommitTasks::FinalizeCommitAsync() {
// tasks to complete.
auto old_val = ops_in_flight_.exchange(participant_ids_.size());
DCHECK_EQ(0, old_val);
- ops_in_flight_ = participant_ids_.size();
- for (int i = 0; i < participant_ids_.size(); i++) {
+ // NOTE: the final FinalizeCommitAsyncTask() call may destruct this
+ // CommitTask and its members, so cache the participant ID size.
+ const auto participant_ids_size = participant_ids_.size();
+ ops_in_flight_ = participant_ids_size;
+ for (int i = 0; i < participant_ids_size; i++) {
FinalizeCommitAsyncTask(i);
}
}
@@ -1010,10 +1016,14 @@ void CommitTasks::BeginCommitAsync() {
// If there are some participants, schedule beginning commit tasks so
// we can determine a finalized commit timestamp.
//
+ // NOTE: the final BeginCommitAsyncTask() call may destruct this CommitTask
+ // and its members, so cache the participant ID size.
+ //
// TODO(awong): consider an approach in which clients propagate
// timestamps in such a way that the client's call to begin commit
// includes the expected finalized commit timestamp.
- for (int i = 0; i < participant_ids_.size(); i++) {
+ const auto participant_ids_size = participant_ids_.size();
+ for (int i = 0; i < participant_ids_size; i++) {
BeginCommitAsyncTask(i);
}
}