This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new f191638  txn_participant-itest: deflake TestFrequentElections
f191638 is described below

commit f191638bca891b36a6ef396fb9aa1f5c8f6af776
Author: Andrew Wong <[email protected]>
AuthorDate: Wed Sep 30 21:03:44 2020 -0700

    txn_participant-itest: deflake TestFrequentElections
    
    I saw this failing a handful of times in a batch of 300, running in TSAN
    mode. With this patch, the test passed 500/500 times.
    
    Change-Id: I71e168c79276ec2ebbb7561dccb7cef69723c544
    Reviewed-on: http://gerrit.cloudera.org:8080/16525
    Tested-by: Andrew Wong <[email protected]>
    Reviewed-by: Grant Henke <[email protected]>
    Reviewed-by: Alexey Serbin <[email protected]>
---
 .../integration-tests/txn_participant-itest.cc     | 36 ++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/src/kudu/integration-tests/txn_participant-itest.cc 
b/src/kudu/integration-tests/txn_participant-itest.cc
index 44f8ae4..1656bd2 100644
--- a/src/kudu/integration-tests/txn_participant-itest.cc
+++ b/src/kudu/integration-tests/txn_participant-itest.cc
@@ -31,6 +31,7 @@
 #include "kudu/common/wire_protocol.h"
 #include "kudu/consensus/raft_consensus.h"
 #include "kudu/gutil/ref_counted.h"
+#include "kudu/gutil/strings/join.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/integration-tests/test_workload.h"
 #include "kudu/mini-cluster/internal_mini_cluster.h"
@@ -64,6 +65,7 @@ using kudu::tablet::TxnParticipant;
 using kudu::tserver::ParticipantOpPB;
 using kudu::tserver::ParticipantRequestPB;
 using kudu::tserver::ParticipantResponsePB;
+using std::string;
 using std::thread;
 using std::unique_ptr;
 using std::vector;
@@ -93,6 +95,14 @@ vector<Status> RunOnReplicas(const vector<TabletReplica*>& 
replicas,
   }
   return statuses;
 }
+string TxnsAsString(const vector<TxnParticipant::TxnEntry>& txns) {
+  return JoinMapped(txns,
+      [](const TxnParticipant::TxnEntry& txn) {
+        return Substitute("(txn_id=$0: $1, $2)",
+            txn.txn_id, Txn::StateToString(txn.state), txn.commit_timestamp);
+      },
+      ",");
+}
 } // anonymous namespace
 
 class TxnParticipantITest : public KuduTest {
@@ -334,7 +344,10 @@ TEST_F(TxnParticipantElectionStormITest, 
TestFrequentElections) {
     // op only guarantees successful replication on a majority. We need to wait
     // a bit for the state to fully quiesce.
     ASSERT_EVENTUALLY([&] {
-      ASSERT_EQ(expected_txns, 
replicas[i]->tablet()->txn_participant()->GetTxnsForTests());
+      const auto& actual_txns = 
replicas[i]->tablet()->txn_participant()->GetTxnsForTests();
+      ASSERT_EQ(expected_txns, actual_txns)
+          << Substitute("Expected: $0,\nActual: $1",
+                        TxnsAsString(expected_txns), 
TxnsAsString(actual_txns));
     });
   }
 
@@ -346,7 +359,26 @@ TEST_F(TxnParticipantElectionStormITest, 
TestFrequentElections) {
     const auto& tablets = ts->ListTablets();
     scoped_refptr<TabletReplica> r;
     ASSERT_TRUE(ts->server()->tablet_manager()->LookupTablet(tablets[0], &r));
-    ASSERT_EQ(expected_txns, 
r->tablet()->txn_participant()->GetTxnsForTests());
+    ASSERT_OK(r->WaitUntilConsensusRunning(MonoDelta::FromSeconds(10)));
+    auto actual_txns = r->tablet()->txn_participant()->GetTxnsForTests();
+    // Upon bootstrapping, we may end up replaying a REPLICATE message with no
+    // COMMIT message, starting it as a follower op. If it's a BEGIN_TXN op,
+    // this leaves us with an initialized Txn that isn't in the expected set,
+    // as it was completed on a majority. The Txn is benign: either it will be
+    // replicated on a majority and will complete, leaving the Txn as kOpen; or
+    // it doesn't, and the op will be aborted by the next leader, removing the
+    // Txn. Ignore such Txns and just assert the ones we know to have
+    // successfully initialized.
+    vector<TxnParticipant::TxnEntry> actual_txns_not_initting;
+    for (const auto& txn : actual_txns) {
+      if (txn.state != Txn::kInitializing) {
+        actual_txns_not_initting.emplace_back(txn);
+      }
+    }
+    ASSERT_EQ(expected_txns, actual_txns_not_initting)
+        << Substitute("Expected: $0,\nActual: $1",
+                      TxnsAsString(expected_txns),
+                      TxnsAsString(actual_txns_not_initting));
   }
 }
 

Reply via email to