This is an automated email from the ASF dual-hosted git repository. granthenke pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 4c53ce892a083ff660f4b7af2374fd6b273329f8 Author: Andrew Wong <[email protected]> AuthorDate: Wed Apr 28 21:26:41 2021 -0700 KUDU-2612: fix race when adopting partition lock The way we were transferring the partition lock to the Txn wasn't thread-safe. This patch wraps the critical section with a spinlock. This patch includes a test that would trigger a TSAN error. Change-Id: Ifc7ac7f474baf308860847298355b300c76d9ef5 Reviewed-on: http://gerrit.cloudera.org:8080/17361 Reviewed-by: Alexey Serbin <[email protected]> Tested-by: Andrew Wong <[email protected]> --- src/kudu/tablet/txn_participant-test.cc | 20 ++++++++++++++++++++ src/kudu/tablet/txn_participant.cc | 1 + src/kudu/tablet/txn_participant.h | 2 ++ 3 files changed, 23 insertions(+) diff --git a/src/kudu/tablet/txn_participant-test.cc b/src/kudu/tablet/txn_participant-test.cc index cedcebd..41ceb44 100644 --- a/src/kudu/tablet/txn_participant-test.cc +++ b/src/kudu/tablet/txn_participant-test.cc @@ -39,6 +39,7 @@ #include "kudu/common/iterator.h" #include "kudu/common/partial_row.h" #include "kudu/common/row_operations.h" +#include "kudu/common/row_operations.pb.h" #include "kudu/common/schema.h" #include "kudu/common/timestamp.h" #include "kudu/common/wire_protocol.h" @@ -389,6 +390,25 @@ TEST_F(TxnParticipantTest, TestConcurrentTransactions) { } } +TEST_F(TxnParticipantTest, TestConcurrentWrites) { + constexpr const auto kNumRows = 10; + constexpr const auto kTxnId = 0; + ParticipantResponsePB resp; + ASSERT_OK(CallParticipantOp(tablet_replica_.get(), kTxnId, + ParticipantOpPB::BEGIN_TXN, -1, &resp)); + vector<thread> threads; + Status statuses[kNumRows]; + for (int i = 0; i < kNumRows; i++) { + threads.emplace_back([&, i] { + statuses[i] = Write(i, kTxnId); + }); + } + std::for_each(threads.begin(), threads.end(), [] (thread& t) { t.join(); }); + for (const auto& s : statuses) { + EXPECT_OK(s); + } +} + // Concurrently try to apply every op and test, based on the results, that some // invariants are maintained. TEST_F(TxnParticipantTest, TestConcurrentOps) { diff --git a/src/kudu/tablet/txn_participant.cc b/src/kudu/tablet/txn_participant.cc index 4a63d53..1f72eb5 100644 --- a/src/kudu/tablet/txn_participant.cc +++ b/src/kudu/tablet/txn_participant.cc @@ -77,6 +77,7 @@ void Txn::AcquireReadLock(shared_lock<rw_semaphore>* txn_lock) { void Txn::AdoptPartitionLock(ScopedPartitionLock partition_lock) { if (PREDICT_TRUE(FLAGS_enable_txn_partition_lock)) { TabletServerErrorPB::Code code = tserver::TabletServerErrorPB::UNKNOWN_ERROR; + std::lock_guard<simple_spinlock> l(lock_); #ifndef NDEBUG CHECK(partition_lock.IsAcquired(&code)) << code; if (partition_lock_.IsAcquired(&code)) { diff --git a/src/kudu/tablet/txn_participant.h b/src/kudu/tablet/txn_participant.h index 92b7b00..43800ae 100644 --- a/src/kudu/tablet/txn_participant.h +++ b/src/kudu/tablet/txn_participant.h @@ -265,6 +265,7 @@ class Txn : public RefCountedThreadSafe<Txn> { } void ReleasePartitionLock() { + std::lock_guard<simple_spinlock> l(lock_); partition_lock_.Release(); } @@ -358,6 +359,7 @@ class Txn : public RefCountedThreadSafe<Txn> { std::unique_ptr<ScopedOp> commit_op_; // Holds the partition lock acquired for this transaction. + simple_spinlock lock_; ScopedPartitionLock partition_lock_; DISALLOW_COPY_AND_ASSIGN(Txn);
