This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 8a30baea1a89749d9bd7aab90ac34db031094dd3 Author: triplesheep <[email protected]> AuthorDate: Fri Nov 22 15:04:25 2019 +0800 KUDU-3008 Spread replicas evenly with 2 locations and odd replica factors In case of distributing 3 replicas among 2 locations, 1 + 2 is better than 3 + 0 because if all servers in the first location become unavailable, in the former case the tablet is still available, while in the latter it's not. Also, 1 + 2 is better than 0 + 3 because in case of catastrophic non-recoverable failure of the second location, no replica survives and all data is lost in the latter case, while in the former case there will be 1 replica and it may be used for manual data recovery. Change-Id: If96361f6be686679dd72f08e28d964eae25875d0 Reviewed-on: http://gerrit.cloudera.org:8080/14783 Tested-by: Kudu Jenkins Reviewed-by: Alexey Serbin <[email protected]> --- src/kudu/master/placement_policy-test.cc | 36 ++++++++++++++++++++++++++++++++ src/kudu/master/placement_policy.cc | 15 ++++++++----- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/src/kudu/master/placement_policy-test.cc b/src/kudu/master/placement_policy-test.cc index caf16d3..f1569c6 100644 --- a/src/kudu/master/placement_policy-test.cc +++ b/src/kudu/master/placement_policy-test.cc @@ -843,6 +843,42 @@ TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L2_EvenRFEdgeCase) { } } +// Odd RF case: edge cases with 2 locaitons. +// Make sure replicas are placed into both locations, even if ideal density +// distribution would have them in a single location. +TEST_F(PlacementPolicyTest, PlaceTabletReplicasCluster_L2_OddRFEdgeCase) { + const vector<LocationInfo> cluster_info = { + { "A", { { "A_ts0", 10 }, { "A_ts1", 10 }, { "A_ts2", 10 }, } }, + { "B", { { "B_ts0", 9 }, { "B_ts1", 9 }, { "B_ts2", 9 }, } }, + }; + ASSERT_OK(Prepare(cluster_info)); + + const auto& all = descriptors(); + PlacementPolicy policy(all, rng()); + + { + static constexpr auto num_replicas = 3; + TSDescriptorVector result; + ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result)); + ASSERT_EQ(num_replicas, result.size()); + TSDescriptorsMap m; + ASSERT_OK(TSDescriptorVectorToMap(result, &m)); + ASSERT_EQ(1, m.count("A_ts0") + m.count("A_ts1") + m.count("A_ts2")); + ASSERT_EQ(2, m.count("B_ts0") + m.count("B_ts1") + m.count("B_ts2")); + } + + { + static constexpr auto num_replicas = 5; + TSDescriptorVector result; + ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, none, &result)); + ASSERT_EQ(num_replicas, result.size()); + TSDescriptorsMap m; + ASSERT_OK(TSDescriptorVectorToMap(result, &m)); + ASSERT_EQ(2, m.count("A_ts0") + m.count("A_ts1") + m.count("A_ts2")); + ASSERT_EQ(3, m.count("B_ts0") + m.count("B_ts1") + m.count("B_ts2")); + } +} + // Even RF case: place tablet replicas into a cluster with 3 locations. TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L3_EvenRF) { const vector<LocationInfo> cluster_info = { diff --git a/src/kudu/master/placement_policy.cc b/src/kudu/master/placement_policy.cc index c81a8ce..4b78076 100644 --- a/src/kudu/master/placement_policy.cc +++ b/src/kudu/master/placement_policy.cc @@ -334,11 +334,16 @@ Status PlacementPolicy::SelectLocation( // in case of 2 locations the best placement for 4 replicas would be // (2 + 2), while in case of 4 and more locations that's (1 + 1 + 1 + 1). // Similarly, in case of 2 locations and 6 replicas, the best placement - // is (3 + 3), while for 3 locations that's (2 + 2 + 2). - if ((num_locations == 2 && num_replicas % 2 == 0 && - location_replicas_num + 1 > num_replicas / 2) || - (num_locations > 2 && - location_replicas_num + 1 >= (num_replicas + 1) / 2)) { + // is (3 + 3), while for 3 locations that's (2 + 2 + 2). In case of + // distributing 3 replicas among 2 locations, 1 + 2 is better than 3 + 0 + // because if all servers in the first location become unavailable, in the + // former case the tablet is still available, while in the latter it's not. + // Also, 1 + 2 is better than 0 + 3 because in case of catastrophic + // non-recoverable failure of the second location, no replica survives and + // all data is lost in the latter case, while in the former case there will + // be 1 replica and it may be used for manual data recovery. + if ((num_locations == 2 && location_replicas_num + 1 > num_replicas / 2) || + (num_locations > 2 && location_replicas_num + 1 >= (num_replicas + 1) / 2)) { // If possible, avoid placing the majority of the tablet's replicas // into a single location even if load-based criterion would favor that. // Prefer such a distribution of replicas that will keep the majority
