This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/branch-1.17.x by this push:
     new 587dbc0af KUDU-3532: Follow up to replica placement bug
587dbc0af is described below

commit 587dbc0af27bd90f199cf3a75cac5d8b4c71bcb2
Author: Mahesh Reddy <mre...@cloudera.com>
AuthorDate: Thu Dec 21 01:17:32 2023 -0500

    KUDU-3532: Follow up to replica placement bug
    
    This patch addresses the feedback left on
    patch [1].
    
    ReservoirSample is refactored to accept an
    unsigned integer rather than a signed integer.
    
    A new test is added to verify the functionality
    of the tiebreaker scenarios of
    PlacementPolicy::SelectReplica().
    
    [1] https://github.com/apache/kudu/commit/a84c8376a
    
    Change-Id: I28582cae538ca27fa26fbfae84460b3b264ddb86
    Reviewed-on: http://gerrit.cloudera.org:8080/20827
    Tested-by: Alexey Serbin <ale...@apache.org>
    Reviewed-by: Alexey Serbin <ale...@apache.org>
    (cherry picked from commit 2810e635cc72aa5279a9dc787b6becad6e0098a9)
    Reviewed-on: http://gerrit.cloudera.org:8080/20861
---
 src/kudu/master/placement_policy-test.cc | 43 ++++++++++++++++++++++++++++++++
 src/kudu/master/placement_policy.cc      | 40 ++++++++++++++++-------------
 src/kudu/util/random_util.h              |  6 ++---
 3 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/src/kudu/master/placement_policy-test.cc 
b/src/kudu/master/placement_policy-test.cc
index 191f81d74..7691e1036 100644
--- a/src/kudu/master/placement_policy-test.cc
+++ b/src/kudu/master/placement_policy-test.cc
@@ -34,6 +34,7 @@
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
+#include "kudu/gutil/map-util.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/master/ts_descriptor.h"
 #include "kudu/util/random.h"
@@ -142,6 +143,15 @@ class PlacementPolicyTest : public ::testing::Test {
     return Status::OK();
   }
 
+  shared_ptr<TSDescriptor> SelectReplica(const TSDescriptorVector& 
source_ts_descs,
+                       const optional<string>& dimension,
+                       const optional<string>& range_key_start,
+                       const optional<string>& table_id,
+                       const std::set<shared_ptr<TSDescriptor>>& excluded) {
+    PlacementPolicy policy(descriptors_, &rng_);
+    return policy.SelectReplica(source_ts_descs, dimension, range_key_start, 
table_id, excluded);
+  }
+
   static Status TSDescriptorVectorToMap(const TSDescriptorVector& v_descs,
                                         TSDescriptorsMap* m_descs) {
     for (const auto& desc : v_descs) {
@@ -1555,5 +1565,38 @@ TEST_F(PlacementPolicyTest, 
PlaceRangeAwareTabletReplicasWithLocations) {
   }
 }
 
+// This test is more granular and specifically tests SelectReplica() and its
+// various tiebreaker scenarios when choosing a tablet server to place a 
replica on.
+TEST_F(PlacementPolicyTest, TestSelectReplicaTieBreakers) {
+  constexpr int nReplicationFactor = 3;
+  TabletNumByRangeMap range({ {"a1", 100} });
+  TabletNumByRangeMap ranges({ {"a1", 100}, {"b1", 100} });
+  const vector<LocationInfo> cluster_info = {
+      {
+          "",
+          {
+              { "ts0", 200, { }, { {"t1", range }, {"t2", range }, } },
+              { "ts1", 100, { }, { {"t1", range }, } },
+              { "ts2", 200, { }, { {"t1", ranges }, } },
+          }
+      },
+  };
+
+  {
+    ASSERT_OK(Prepare(cluster_info));
+    std::set<shared_ptr<TSDescriptor>> selected;
+    // All tablet servers have the same number of replicas per range "a1" of 
table "t1".
+    // "ts1" is chosen first since it has the least number of replicas overall.
+    // "ts0" is chosen next since it has the least number of replicas of table 
"t1".
+    vector<string> chosen_tservers_in_order = {"ts1", "ts0", "ts2"};
+    for (int i = 0; i < nReplicationFactor; ++i) {
+      auto ts = SelectReplica(descriptors(), nullopt, "a1", "t1", selected);
+      ASSERT_TRUE(ts);
+      ASSERT_TRUE(ts->permanent_uuid() == chosen_tservers_in_order[i]) << 
ts->permanent_uuid();
+      EmplaceOrDie(&selected, std::move(ts));
+    }
+  }
+}
+
 } // namespace master
 } // namespace kudu
diff --git a/src/kudu/master/placement_policy.cc 
b/src/kudu/master/placement_policy.cc
index d2d95ea16..d78c67902 100644
--- a/src/kudu/master/placement_policy.cc
+++ b/src/kudu/master/placement_policy.cc
@@ -350,6 +350,12 @@ Status PlacementPolicy::SelectReplicas(const 
TSDescriptorVector& source_ts_descs
 // multiple tablet servers, the total number of replicas will be considered. If
 // there's still a tie, a tablet server will be randomly chosen.
 //
+// When choosing a tablet server to place a replica on from the set of tablet
+// servers "ts_descs", any tablet servers from the set "excluded" that are also
+// in the set "ts_descs" will not be chosen. It's possible that there's no
+// overlap between these two sets as "excluded" can contain tservers from
+// other locations and thus have no relation to the set "ts_descs".
+//
 // The old replica selection algorithm follows the idea from
 // "Power of Two Choices in Randomized Load Balancing"[1]. For each replica,
 // we randomly select two tablet servers, and then assign the replica to the
@@ -381,8 +387,7 @@ shared_ptr<TSDescriptor> PlacementPolicy::SelectReplica(
     const set<shared_ptr<TSDescriptor>>& excluded) const {
   if (range_key_start && table_id) {
     TSDescriptorVector ts_choices;
-    const int ts_size = static_cast<int>(ts_descs.size());
-    CHECK_GE(ts_size, 0);
+    const auto ts_size = ts_descs.size();
     ReservoirSample(ts_descs, ts_size, excluded, rng_, &ts_choices);
     DCHECK_LE(ts_choices.size(), ts_size);
     if (ts_choices.size() > 1) {
@@ -393,23 +398,22 @@ shared_ptr<TSDescriptor> PlacementPolicy::SelectReplica(
       return ts_choices.front();
     }
     return nullptr;
-  } else {
-    // Pick two random servers, excluding those we've already picked.
-    // If we've only got one server left, 'two_choices' will actually
-    // just contain one element.
-    TSDescriptorVector two_choices;
-    ReservoirSample(ts_descs, 2, excluded, rng_, &two_choices);
-    DCHECK_LE(two_choices.size(), 2);
-
-    if (two_choices.size() == 2) {
-      // Pick the better of the two.
-      return PickBetterTabletServer(two_choices, dimension, rng_);
-    }
-    if (two_choices.size() == 1) {
-      return two_choices.front();
-    }
-    return nullptr;
   }
+  // Pick two random servers, excluding those we've already picked.
+  // If we've only got one server left, 'two_choices' will actually
+  // just contain one element.
+  TSDescriptorVector two_choices;
+  ReservoirSample(ts_descs, 2, excluded, rng_, &two_choices);
+  DCHECK_LE(two_choices.size(), 2);
+
+  if (two_choices.size() == 2) {
+    // Pick the better of the two.
+    return PickBetterTabletServer(two_choices, dimension, rng_);
+  }
+  if (two_choices.size() == 1) {
+    return two_choices.front();
+  }
+  return nullptr;
 }
 
 Status PlacementPolicy::SelectLocation(
diff --git a/src/kudu/util/random_util.h b/src/kudu/util/random_util.h
index abeef3ee1..0e2631910 100644
--- a/src/kudu/util/random_util.h
+++ b/src/kudu/util/random_util.h
@@ -79,11 +79,11 @@ std::vector<T> SelectRandomSubset(const Container& c, int 
min_to_return, Rand* r
 // The results are not stored in a randomized order: the order of results will
 // match their order in the input collection.
 template<class Collection, class Set, class T, typename Rand>
-void ReservoirSample(const Collection& c, int k, const Set& avoid,
+void ReservoirSample(const Collection& c, uint32_t k, const Set& avoid,
                      Rand* r, std::vector<T>* result) {
   result->clear();
   result->reserve(k);
-  int i = 0;
+  uint32_t i = 0;
   for (const T& elem : c) {
     if (ContainsKey(avoid, elem)) {
       continue;
@@ -95,7 +95,7 @@ void ReservoirSample(const Collection& c, int k, const Set& 
avoid,
       continue;
     }
     // Otherwise replace existing elements with decreasing probability.
-    int j = r->Uniform(i);
+    uint32_t j = r->Uniform(i);
     if (j < k) {
       (*result)[j] = elem;
     }

Reply via email to