This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 2b9a2012f KUDU-3571: fix flakiness in
AutoIncrementingItest.BootstrapNoWalsNoData
2b9a2012f is described below
commit 2b9a2012f6d7b59931119dfad03e8d40e3031a0e
Author: zhangyifan27 <[email protected]>
AuthorDate: Wed Nov 27 18:11:37 2024 +0800
KUDU-3571: fix flakiness in AutoIncrementingItest.BootstrapNoWalsNoData
The test AutoIncrementingItest.BootstrapNoWalsNoData sometimes failed due
to MVCC timestamp has not been initialized or inability to wait for
in-flight
ops to be finished. This patch fixes this issue by waiting for everything
to be consistent before scanning.
Before this patch, when running the test with DEBUG configuration, 6/20
tests
failed. After this patch, 20/20 tests succeed.
Change-Id: I5bd387c82b632dbb77aa5a45f831273392ae05b4
Reviewed-on: http://gerrit.cloudera.org:8080/22133
Tested-by: Kudu Jenkins
Reviewed-by: Abhishek Chennaka <[email protected]>
Reviewed-by: Alexey Serbin <[email protected]>
Reviewed-by: Ashwani Raina <[email protected]>
---
src/kudu/integration-tests/auto_incrementing-itest.cc | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/src/kudu/integration-tests/auto_incrementing-itest.cc
b/src/kudu/integration-tests/auto_incrementing-itest.cc
index 4c49bde02..6f78b025f 100644
--- a/src/kudu/integration-tests/auto_incrementing-itest.cc
+++ b/src/kudu/integration-tests/auto_incrementing-itest.cc
@@ -18,6 +18,7 @@
// Integration test for flexible partitioning (eg buckets, range partitioning
// of PK subsets, etc).
+#include <functional>
#include <memory>
#include <ostream>
#include <string>
@@ -38,6 +39,7 @@
#include "kudu/common/wire_protocol.h"
#include "kudu/consensus/metadata.pb.h"
#include "kudu/gutil/strings/substitute.h"
+#include "kudu/integration-tests/cluster_verifier.h"
#include "kudu/mini-cluster/external_mini_cluster.h"
#include "kudu/rpc/rpc_controller.h"
#include "kudu/tablet/tablet.pb.h"
@@ -394,7 +396,6 @@ TEST_F(AutoIncrementingItest, BootstrapWithNoWals) {
}
}
-
TEST_F(AutoIncrementingItest, BootstrapNoWalsNoData) {
string tablet_uuid;
TestSetup(&tablet_uuid);
@@ -437,13 +438,17 @@ TEST_F(AutoIncrementingItest, BootstrapNoWalsNoData) {
cluster_->tablet_server(i)->Shutdown();
ASSERT_OK(cluster_->tablet_server(i)->Restart());
}
+ // Ensure that the tablet is running and leader elected.
+ ASSERT_EVENTUALLY([&] {
ASSERT_OK(ClusterVerifier(cluster_.get()).RunKsck()); });
// Insert new data and verify auto_incrementing_id starts from 1.
ASSERT_OK(InsertData(kNumRows, kNumRows * 2));
+ // Wait for all the replicas to converge.
+ NO_FATALS(ClusterVerifier(cluster_.get()).CheckCluster());
for (int j = 0; j < kNumTabletServers; j++) {
vector<string> results;
ASSERT_OK(ScanTablet(j, tablet_uuid, &results));
- ASSERT_EQ(200, results.size());
+ ASSERT_EQ(kNumRows, results.size());
for (int i = 0; i < results.size(); i++) {
ASSERT_EQ(Substitute("(int32 c0=$0, int64 $1=$2, string
c1=\"string_val\")", i + kNumRows,
Schema::GetAutoIncrementingColumnName(), i + 1),
results[i]);
@@ -503,7 +508,7 @@ TEST_F(AutoIncrementingItest, BootstrapWalsDiverge) {
// Write 200 rows at the rate of 1 row every 5ms which are sent to the
leader replica. After
// 100ms of starting to insert data, we shutdown the followers and at this
point the write
// request is expected to 900ms more. Since the leader would mark the
followers as
- // unavailable after 3 lost hearbeats (1500ms), there will for sure be a
situation where the
+ // unavailable after 3 lost heartbeats (1500ms), there will for sure be a
situation where the
// leader has sent a write op and hasn't gotten the response from majority-1
number of
// followers. In this case the write op is not marked committed in the
leader replica. All
// the writes including this are considered failed.