This is an automated email from the ASF dual-hosted git repository.
adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 396e412 tablet_server-test: deflake ScanCorruptedDeltasParamTest
396e412 is described below
commit 396e41280bc4eb5a1282db9e500feaadd09d5c4f
Author: Adar Dembo <[email protected]>
AuthorDate: Fri Oct 4 16:54:20 2019 -0700
tablet_server-test: deflake ScanCorruptedDeltasParamTest
Saw this in a precommit, though I could only repro it 1/10000 times using
dist-test, so it must be pretty rare. Normally scans on a failed tablet will
yield the error already expected in the test, but tablet failure is
asynchronous, so if we're doing snapshot scans, it's possible for the next
scan to see a running tablet, start the scan, and fail within
WaitForSnapshotWithAllCommitted because the async failure process hasn't yet
marked the entire tablet as failed.
/home/jenkins-slave/workspace/kudu-master/1/src/kudu/tserver/tablet_server-test.cc:1840:
Failure
Expected: resp.error().status().code()
Which is: 11
To be equal to: AppStatusPB::ILLEGAL_STATE
Which is: 9
Google Test trace:
/home/jenkins-slave/workspace/kudu-master/1/src/kudu/tserver/tablet_server-test.cc:1839:
error {
code: TABLET_FAILED
status {
code: ABORTED
message: "MVCC is closed"
}
}
Change-Id: I51012fa5912e4ce6a2e8f1d370ac387df5bd6db6
Reviewed-on: http://gerrit.cloudera.org:8080/14375
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <[email protected]>
---
src/kudu/tserver/tablet_server-test.cc | 35 +++++++++++++++++++++++++---------
1 file changed, 26 insertions(+), 9 deletions(-)
diff --git a/src/kudu/tserver/tablet_server-test.cc
b/src/kudu/tserver/tablet_server-test.cc
index a7bdecd..3a3f080 100644
--- a/src/kudu/tserver/tablet_server-test.cc
+++ b/src/kudu/tserver/tablet_server-test.cc
@@ -1821,13 +1821,24 @@ TEST_P(ScanCorruptedDeltasParamTest, Test) {
// Send the call. This first call should attempt to init the corrupted
// deltafiles and return with an error. Subsequent calls should see that the
// previous call to init failed and should return an appropriate error.
- req.set_batch_size_bytes(10000);
- SCOPED_TRACE(SecureDebugString(req));
- ASSERT_OK(proxy_->Scan(req, &resp, &rpc));
- SCOPED_TRACE(SecureDebugString(resp));
- ASSERT_TRUE(resp.has_error());
- ASSERT_EQ(resp.error().status().code(), AppStatusPB::CORRUPTION);
- ASSERT_STR_CONTAINS(resp.error().status().message(), "failed to init
CFileReader");
+ //
+ // It's possible for snapshot scans to be waiting in MVCC when the tablet
+ // fails. If that happens, the error will be slightly different.
+ {
+ req.set_batch_size_bytes(10000);
+ SCOPED_TRACE(SecureDebugString(req));
+ ASSERT_OK(proxy_->Scan(req, &resp, &rpc));
+ SCOPED_TRACE(SecureDebugString(resp));
+ ASSERT_TRUE(resp.has_error());
+ const auto& s = resp.error().status();
+ if (s.code() == AppStatusPB::CORRUPTION) {
+ ASSERT_STR_CONTAINS(s.message(), "failed to init CFileReader");
+ } else if (s.code() == AppStatusPB::ABORTED) {
+ ASSERT_STR_CONTAINS(s.message(), "MVCC is closed");
+ } else {
+ FAIL() << "Unexpected failure";
+ }
+ }
// The tablet will end up transitioning to a failed state and yield "not
// running" errors.
@@ -1837,8 +1848,14 @@ TEST_P(ScanCorruptedDeltasParamTest, Test) {
SCOPED_TRACE(SecureDebugString(req));
ASSERT_TRUE(resp.has_error());
SCOPED_TRACE(SecureDebugString(resp));
- ASSERT_EQ(resp.error().status().code(), AppStatusPB::ILLEGAL_STATE);
- ASSERT_STR_CONTAINS(resp.error().status().message(), "Tablet not RUNNING");
+ const auto& s = resp.error().status();
+ if (s.code() == AppStatusPB::ILLEGAL_STATE) {
+ ASSERT_STR_CONTAINS(s.message(), "Tablet not RUNNING");
+ } else if (s.code() == AppStatusPB::ABORTED) {
+ ASSERT_STR_CONTAINS(s.message(), "MVCC is closed");
+ } else {
+ FAIL() << "Unexpected failure";
+ }
}
}