This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch branch-1.11.x in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 0afd48c1d602fc83b91890d3fb3c8c7aca3d45f9 Author: Adar Dembo <[email protected]> AuthorDate: Wed Oct 16 12:15:56 2019 -0700 kudu-tool-test: deflake TestFsAddRemoveDataDirEndToEnd This test was made somewhat flaky by KUDU-2901. The reason is subtle: if any unrelated IO (such as from a concurrently running test) interleaves with the creation of the second table, it's possible for the new data directory to be measured with less available space than the existing ones. The existing LBM containers have all been preallocated by this point so the test never remeasures available space. Thus, when the second table flushes, the KUDU-2901 heuristic ensures that all new data blocks land on the existing data directories rather than the new one. The fix is simple: a feature flag for this feature (which we should have had anyway), which we disable for the test. Change-Id: Ifb92759318c12d791d26eb8fb6b77914c284f4cb Reviewed-on: http://gerrit.cloudera.org:8080/14462 Reviewed-by: Andrew Wong <[email protected]> Tested-by: Adar Dembo <[email protected]> (cherry picked from commit 7e8f37f8ddf46b965b0af4eba3f31c02b11b8fba) Reviewed-on: http://gerrit.cloudera.org:8080/14469 Tested-by: Kudu Jenkins Reviewed-by: Grant Henke <[email protected]> --- src/kudu/fs/data_dirs.cc | 17 +++++++++++++---- src/kudu/tools/kudu-tool-test.cc | 6 ++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/kudu/fs/data_dirs.cc b/src/kudu/fs/data_dirs.cc index c5205ab..5812e1d 100644 --- a/src/kudu/fs/data_dirs.cc +++ b/src/kudu/fs/data_dirs.cc @@ -95,6 +95,12 @@ DEFINE_bool(fs_lock_data_dirs, true, TAG_FLAG(fs_lock_data_dirs, unsafe); TAG_FLAG(fs_lock_data_dirs, evolving); +DEFINE_bool(fs_data_dirs_consider_available_space, true, + "Whether to consider available space when selecting a data " + "directory during tablet or data block creation."); +TAG_FLAG(fs_data_dirs_consider_available_space, runtime); +TAG_FLAG(fs_data_dirs_consider_available_space, evolving); + METRIC_DEFINE_gauge_uint64(server, data_dirs_failed, "Data Directories Failed", kudu::MetricUnit::kDataDirectories, @@ -1011,9 +1017,11 @@ Status DataDirManager::GetDirForBlock(const CreateBlockOptions& opts, DataDir** return Status::OK(); } // Pick two randomly and select the one with more space. - shuffle(candidate_dirs.begin(), candidate_dirs.end(), default_random_engine(rng_.Next())); - *dir = (candidate_dirs[0]->available_bytes() > candidate_dirs[1]->available_bytes()) ? - candidate_dirs[0] : candidate_dirs[1]; + shuffle(candidate_dirs.begin(), candidate_dirs.end(), + default_random_engine(rng_.Next())); + *dir = PREDICT_TRUE(FLAGS_fs_data_dirs_consider_available_space) && + candidate_dirs[0]->available_bytes() > candidate_dirs[1]->available_bytes() ? + candidate_dirs[0] : candidate_dirs[1]; return Status::OK(); } @@ -1130,7 +1138,8 @@ void DataDirManager::GetDirsForGroupUnlocked(int target_size, int tablets_in_first = FindOrDie(tablets_by_uuid_idx_map_, candidate_indices[0]).size(); int tablets_in_second = FindOrDie(tablets_by_uuid_idx_map_, candidate_indices[1]).size(); int selected_index = 0; - if (tablets_in_first == tablets_in_second) { + if (tablets_in_first == tablets_in_second && + PREDICT_TRUE(FLAGS_fs_data_dirs_consider_available_space)) { int64_t space_in_first = FindOrDie(data_dir_by_uuid_idx_, candidate_indices[0])->available_bytes(); int64_t space_in_second = FindOrDie(data_dir_by_uuid_idx_, diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc index d2c1718..f122e78 100644 --- a/src/kudu/tools/kudu-tool-test.cc +++ b/src/kudu/tools/kudu-tool-test.cc @@ -136,6 +136,7 @@ #include "kudu/util/test_util.h" #include "kudu/util/url-coding.h" +DECLARE_bool(fs_data_dirs_consider_available_space); DECLARE_bool(hive_metastore_sasl_enabled); DECLARE_bool(show_values); DECLARE_bool(show_attributes); @@ -4943,6 +4944,11 @@ TEST_F(ToolTest, TestFsAddRemoveDataDirEndToEnd) { return; } + // Disable the available space heuristic as it can interfere with the desired + // test invariant below (that the newly created table write to the newly added + // data directory). + FLAGS_fs_data_dirs_consider_available_space = false; + // Start a cluster whose tserver has multiple data directories. InternalMiniClusterOptions opts; opts.num_data_dirs = 2;
