This is an automated email from the ASF dual-hosted git repository.
awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 19c0f7b test: fix flakiness of TestDeletedRowsetGc
19c0f7b is described below
commit 19c0f7b19b623bc91ebcd7b6451300dc45364b0b
Author: Andrew Wong <[email protected]>
AuthorDate: Mon Jul 6 12:49:02 2020 -0700
test: fix flakiness of TestDeletedRowsetGc
The test would sometimes fail (4/100 times in debug mode) with the following
newly added log lines:
W0706 19:48:08.251113 4454 env_posix.cc:1634] Error running callback with
file
/tmp/dist-test-taskGX39QD/test-tmp/tablet_history_gc-itest.0.TabletHistoryGcITest.TestDeletedRowsetGc.1594064887277463-4454/minicluster-data/ts-0-root/tablet-meta/57ca68c74fa94632b1acffbc547afa4c.kudutmp.308Cez
during walk: Not found:
/tmp/dist-test-taskGX39QD/test-tmp/tablet_history_gc-itest.0.TabletHistoryGcITest.TestDeletedRowsetGc.1594064887277463-4454/minicluster-data/ts-0-root/tablet-meta/57ca68c74fa
[...]
../../src/kudu/integration-tests/tablet_history_gc-itest.cc:327: Failure
Failed
Bad status: IO error:
/tmp/dist-test-taskGX39QD/test-tmp/tablet_history_gc-itest.0.TabletHistoryGcITest.TestDeletedRowsetGc.1594064887277463-4454/minicluster-data/ts-0-root:
One or more errors occurred
It seems recursing through the file system isn't a great idea when
maintenance ops are running in the background. This addresses the issue
by asserting eventual success.
With this patch, the test succeeded 1000/1000 times in debug mode.
Change-Id: If2b2583d984c7cca1a747920095e4dbafde9cad9
Reviewed-on: http://gerrit.cloudera.org:8080/16145
Reviewed-by: Alexey Serbin <[email protected]>
Tested-by: Kudu Jenkins
---
.../integration-tests/tablet_history_gc-itest.cc | 6 ++++--
src/kudu/util/env_posix.cc | 21 +++++++++++++--------
2 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/src/kudu/integration-tests/tablet_history_gc-itest.cc
b/src/kudu/integration-tests/tablet_history_gc-itest.cc
index c9a7d81..cd72998 100644
--- a/src/kudu/integration-tests/tablet_history_gc-itest.cc
+++ b/src/kudu/integration-tests/tablet_history_gc-itest.cc
@@ -322,8 +322,10 @@ TEST_F(TabletHistoryGcITest, TestDeletedRowsetGc) {
}
ASSERT_OK(session->Flush());
uint64_t measured_size_before_gc;
-
ASSERT_OK(Env::Default()->GetFileSizeOnDiskRecursively(cluster_->GetTabletServerFsRoot(0),
-
&measured_size_before_gc));
+ ASSERT_EVENTUALLY([&] {
+
ASSERT_OK(Env::Default()->GetFileSizeOnDiskRecursively(cluster_->GetTabletServerFsRoot(0),
+
&measured_size_before_gc));
+ });
// Move forward the clock so our rowsets are all considered ancient.
HybridClock* c = down_cast<HybridClock*>(tablet->clock());
AddTimeToHybridClock(c,
MonoDelta::FromSeconds(FLAGS_tablet_history_max_age_sec));
diff --git a/src/kudu/util/env_posix.cc b/src/kudu/util/env_posix.cc
index 2657452..4b1078a 100644
--- a/src/kudu/util/env_posix.cc
+++ b/src/kudu/util/env_posix.cc
@@ -1594,24 +1594,24 @@ class PosixEnv : public Env {
FTSENT* ent = nullptr;
bool had_errors = false;
while ((ent = fts_read(tree.get())) != nullptr) {
- bool doCb = false;
+ bool do_cb = false;
FileType type = DIRECTORY_TYPE;
switch (ent->fts_info) {
case FTS_D: // Directory in pre-order
if (order == PRE_ORDER) {
- doCb = true;
+ do_cb = true;
}
break;
case FTS_DP: // Directory in post-order
if (order == POST_ORDER) {
- doCb = true;
+ do_cb = true;
}
break;
case FTS_F: // A regular file
case FTS_SL: // A symbolic link
case FTS_SLNONE: // A broken symbolic link
case FTS_DEFAULT: // Unknown type of file
- doCb = true;
+ do_cb = true;
type = FILE_TYPE;
break;
@@ -1620,16 +1620,21 @@ class PosixEnv : public Env {
case FTS_NS:
LOG(WARNING) << "Unable to access file " << ent->fts_path
<< " during walk: " << strerror(ent->fts_errno);
+ LOG(WARNING) << Substitute("Unable to access file $0 during walk:
$1",
+ ent->fts_path, strerror(ent->fts_errno));
had_errors = true;
break;
default:
- LOG(WARNING) << "Unable to access file " << ent->fts_path
- << " during walk (code " << ent->fts_info << ")";
+ LOG(WARNING) << Substitute("Unable to access file $0 during walk
(code $1)",
+ ent->fts_path, ent->fts_info);
break;
}
- if (doCb) {
- if (!cb(type, DirName(ent->fts_path), ent->fts_name).ok()) {
+ if (do_cb) {
+ Status s = cb(type, DirName(ent->fts_path), ent->fts_name);
+ if (PREDICT_FALSE(!s.ok())) {
+ LOG(WARNING) << Substitute("Error running callback with file $0
during walk: $1",
+ ent->fts_path, s.ToString());
had_errors = true;
}
}