[2/2] kudu git commit: [master] update placement logic for RF % 2 == 0

alexey Mon, 29 Oct 2018 11:34:27 -0700

[master] update placement logic for RF % 2 == 0

Updated the logic of the replica placement in master to properly handle
even replication factors.  Added corresponding unit tests as well.


It's possible to configure Kudu to allow creation of tables with an
even replication factor.  I think it's easier to implement the handling
of those cases instead of handling possible inconsistencies in
the rebalancer and other tools and adding extra paragraphs into release
notes.

Change-Id: I4259f805090dd350f31bf3bf2b6477898214ece4
Reviewed-on: http://gerrit.cloudera.org:8080/11763
Tested-by: Kudu Jenkins
Reviewed-by: Will Berkeley <wdberke...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/2c5c9d06
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/2c5c9d06
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/2c5c9d06

Branch: refs/heads/master
Commit: 2c5c9d06bce6897d78f31178ad6a437d7c48f29b
Parents: be64560
Author: Alexey Serbin <aser...@cloudera.com>
Authored: Tue Oct 23 17:54:05 2018 -0700
Committer: Alexey Serbin <aser...@cloudera.com>
Committed: Mon Oct 29 18:32:12 2018 +0000

----------------------------------------------------------------------
 src/kudu/master/placement_policy-test.cc | 116 ++++++++++++++++++++++++++
 src/kudu/master/placement_policy.cc      |  21 ++++-
 src/kudu/master/placement_policy.h       |   5 +-
 3 files changed, 137 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/2c5c9d06/src/kudu/master/placement_policy-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/placement_policy-test.cc 
b/src/kudu/master/placement_policy-test.cc
index 951bcd0..541d0eb 100644
--- a/src/kudu/master/placement_policy-test.cc
+++ b/src/kudu/master/placement_policy-test.cc
@@ -769,5 +769,121 @@ TEST_F(PlacementPolicyTest, PlaceExtraTablet_L5_TS16_RF5) 
{
   EXPECT_GT(1500, placement_stats["B_ts3"]);
 }
 
+// Even RF case: edge cases with 2 locaitons.
+TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L2_EvenRFEdgeCase) 
{
+  const vector<LocationInfo> cluster_info = {
+    { "A", { { "A_ts0", 10 }, { "A_ts1", 10 }, { "A_ts2", 10 }, } },
+    { "B", { { "B_ts0", 0 }, { "B_ts1", 0 }, { "B_ts2", 1 }, { "B_ts3", 10 }, 
} },
+  };
+  ASSERT_OK(Prepare(cluster_info));
+
+  const auto& all = descriptors();
+  PlacementPolicy policy(all, rng());
+
+  {
+    static constexpr auto num_replicas = 2;
+    TSDescriptorVector result;
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_EQ(num_replicas, result.size());
+    TSDescriptorsMap m;
+    ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+    ASSERT_EQ(1, m.count("A_ts0") + m.count("A_ts1") + m.count("A_ts2"));
+    ASSERT_EQ(1, m.count("B_ts0") + m.count("B_ts1") + m.count("B_ts2"));
+  }
+  {
+    static constexpr auto num_replicas = 4;
+    TSDescriptorVector result;
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_EQ(num_replicas, result.size());
+    TSDescriptorsMap m;
+    ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+    ASSERT_EQ(2, m.count("A_ts0") + m.count("A_ts1") + m.count("A_ts2"));
+    ASSERT_EQ(2, m.count("B_ts0") + m.count("B_ts1") + m.count("B_ts2"));
+  }
+  {
+    static constexpr auto num_replicas = 6;
+    TSDescriptorVector result;
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_EQ(num_replicas, result.size());
+    TSDescriptorsMap m;
+    ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+    ASSERT_EQ(1, m.count("A_ts0"));
+    ASSERT_EQ(1, m.count("A_ts1"));
+    ASSERT_EQ(1, m.count("A_ts2"));
+    ASSERT_EQ(1, m.count("B_ts0"));
+    ASSERT_EQ(1, m.count("B_ts1"));
+    ASSERT_EQ(1, m.count("B_ts2") + m.count("B_ts3"));
+  }
+}
+
+// Even RF case: place tablet replicas into a cluster with 3 locations.
+TEST_F(PlacementPolicyTest, PlaceTabletReplicasEmptyCluster_L3_EvenRF) {
+  const vector<LocationInfo> cluster_info = {
+    { "A", { { "A_ts0", 10 }, { "A_ts1", 10 }, { "A_ts2", 10 }, } },
+    { "B", { { "B_ts0", 0 }, { "B_ts1", 0 }, { "B_ts2", 10 }, } },
+    { "C", { { "C_ts0", 0 }, { "C_ts1", 0 }, { "C_ts2", 10 }, } },
+  };
+  ASSERT_OK(Prepare(cluster_info));
+
+  const auto& all = descriptors();
+  PlacementPolicy policy(all, rng());
+
+  {
+    static constexpr auto num_replicas = 2;
+    TSDescriptorVector result;
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_EQ(num_replicas, result.size());
+    TSDescriptorsMap m;
+    ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+    // Two location are to have one replica, one location to have none.
+    ASSERT_GE(1, m.count("A_ts0") + m.count("A_ts1") + m.count("A_ts2"));
+    ASSERT_GE(1, m.count("B_ts0") + m.count("B_ts1"));
+    ASSERT_GE(1, m.count("C_ts0") + m.count("C_ts1"));
+  }
+  {
+    static constexpr auto num_replicas = 4;
+    TSDescriptorVector result;
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_EQ(num_replicas, result.size());
+    TSDescriptorsMap m;
+    ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+    // One location is to have two replicas, the rest are to have just one.
+    ASSERT_LE(1, m.count("A_ts0") + m.count("A_ts1") + m.count("A_ts2"));
+    ASSERT_GE(2, m.count("A_ts0") + m.count("A_ts1") + m.count("A_ts2"));
+    ASSERT_LE(1, m.count("B_ts0") + m.count("B_ts1"));
+    ASSERT_GE(2, m.count("B_ts0") + m.count("B_ts1"));
+    ASSERT_LE(1, m.count("C_ts0") + m.count("C_ts1"));
+    ASSERT_GE(2, m.count("C_ts0") + m.count("C_ts1"));
+  }
+  {
+    static constexpr auto num_replicas = 6;
+    TSDescriptorVector result;
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_EQ(num_replicas, result.size());
+    TSDescriptorsMap m;
+    ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+    ASSERT_EQ(2, m.count("A_ts0") + m.count("A_ts1") + m.count("A_ts2"));
+    ASSERT_EQ(1, m.count("B_ts0"));
+    ASSERT_EQ(1, m.count("B_ts1"));
+    ASSERT_EQ(1, m.count("C_ts0"));
+    ASSERT_EQ(1, m.count("C_ts1"));
+  }
+  {
+    static constexpr auto num_replicas = 8;
+    TSDescriptorVector result;
+    ASSERT_OK(policy.PlaceTabletReplicas(num_replicas, &result));
+    ASSERT_EQ(num_replicas, result.size());
+    TSDescriptorsMap m;
+    ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+    ASSERT_EQ(2, m.count("A_ts0") + m.count("A_ts1") + m.count("A_ts2"));
+    ASSERT_EQ(1, m.count("B_ts0"));
+    ASSERT_EQ(1, m.count("B_ts1"));
+    ASSERT_EQ(1, m.count("B_ts2"));
+    ASSERT_EQ(1, m.count("C_ts0"));
+    ASSERT_EQ(1, m.count("C_ts1"));
+    ASSERT_EQ(1, m.count("C_ts2"));
+  }
+}
+
 } // namespace master
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/2c5c9d06/src/kudu/master/placement_policy.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/placement_policy.cc 
b/src/kudu/master/placement_policy.cc
index 45f9ef7..5b0b0d6 100644
--- a/src/kudu/master/placement_policy.cc
+++ b/src/kudu/master/placement_policy.cc
@@ -299,6 +299,8 @@ Status PlacementPolicy::SelectLocation(
     string* location) const {
   DCHECK(location);
 
+  const auto num_locations = ltd_.size();
+
   // A pair of the location-per-load maps. The idea is to get a group to select
   // the best location based on the load, while not placing the majority of
   // replicas in same location, if possible. Using multimap (but not
@@ -317,11 +319,24 @@ Status PlacementPolicy::SelectLocation(
         // per tablet server.
         continue;
       }
-      if (location_replicas_num + 1 > num_replicas / 2) {
+      // When placing the replicas of a tablet, it's necessary to take into
+      // account number of available locations, since the maximum number
+      // of replicas per non-overflow location depends on that. For example,
+      // in case of 2 locations the best placement for 4 replicas would be
+      // (2 + 2), while in case of 4 and more locations that's (1 + 1 + 1 + 1).
+      // Similarly, in case of 2 locations and 6 replicas, the best placement
+      // is (3 + 3), while for 3 locations that's (2 + 2 + 2).
+      if ((num_locations == 2 && num_replicas % 2 == 0 &&
+           location_replicas_num + 1 > num_replicas / 2) ||
+          (num_locations > 2 &&
+           location_replicas_num + 1 >= (num_replicas + 1) / 2)) {
         // If possible, avoid placing the majority of the tablet's replicas
         // into a single location even if load-based criterion would favor 
that.
-        // So, if placing one extra replica will add up to the majority, place
-        // this location into the overflow group.
+        // Prefer such a distribution of replicas that will keep the majority
+        // of replicas alive if any single location fails. So, if placing one
+        // extra replica would add up to the majority in case of odd 
replication
+        // factor or add up to the half of all replicas in case of even
+        // replication factor, place this location into the overflow group.
         location_per_load_overflow.emplace(
             GetLocationLoad(location, locations_info), location);
         continue;

http://git-wip-us.apache.org/repos/asf/kudu/blob/2c5c9d06/src/kudu/master/placement_policy.h
----------------------------------------------------------------------
diff --git a/src/kudu/master/placement_policy.h 
b/src/kudu/master/placement_policy.h
index b645818..924c0d1 100644
--- a/src/kudu/master/placement_policy.h
+++ b/src/kudu/master/placement_policy.h
@@ -113,7 +113,7 @@ class PlacementPolicy {
                          const ReplicaLocationsInfo& locations_info) const;
 
   // Select locations to place the given number of replicas ('nreplicas') for
-  // a new tablet. The locations are be chosen according to the placement
+  // a new tablet. The locations are chosen according to the placement
   // policies.
   //
   // TODO (aserbin): add the reference to the document once it's in the repo.
@@ -135,7 +135,8 @@ class PlacementPolicy {
 
   // Select location for next replica of a tablet with the specified 
replication
   // factor. In essence, the algorithm picks the least loaded location,
-  // making sure no location contains the majority of the replicas.
+  // making sure no location contains the majority of replicas of the tablet,
+  // if possible.
   //
   // Parameters:
   //   'num_replicas'   The total number of tablet replicas to place.

[2/2] kudu git commit: [master] update placement logic for RF % 2 == 0

Reply via email to