This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 2810e635c KUDU-3252: Follow up to replica placement bug
2810e635c is described below
commit 2810e635cc72aa5279a9dc787b6becad6e0098a9
Author: Mahesh Reddy <[email protected]>
AuthorDate: Thu Dec 21 01:17:32 2023 -0500
KUDU-3252: Follow up to replica placement bug
This patch addresses the feedback left on
patch [1].
ReservoirSample is refactored to accept an
unsigned integer rather than a signed integer.
A new test is added to verify the functionality
of the tiebreaker scenarios of
PlacementPolicy::SelectReplica().
[1] https://github.com/apache/kudu/commit/a84c8376a
Change-Id: I28582cae538ca27fa26fbfae84460b3b264ddb86
Reviewed-on: http://gerrit.cloudera.org:8080/20827
Tested-by: Alexey Serbin <[email protected]>
Reviewed-by: Alexey Serbin <[email protected]>
---
src/kudu/master/placement_policy-test.cc | 43 ++++++++++++++++++++++++++++++++
src/kudu/master/placement_policy.cc | 40 ++++++++++++++++-------------
src/kudu/util/random_util.h | 6 ++---
3 files changed, 68 insertions(+), 21 deletions(-)
diff --git a/src/kudu/master/placement_policy-test.cc
b/src/kudu/master/placement_policy-test.cc
index 191f81d74..7691e1036 100644
--- a/src/kudu/master/placement_policy-test.cc
+++ b/src/kudu/master/placement_policy-test.cc
@@ -34,6 +34,7 @@
#include <glog/logging.h>
#include <gtest/gtest.h>
+#include "kudu/gutil/map-util.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/master/ts_descriptor.h"
#include "kudu/util/random.h"
@@ -142,6 +143,15 @@ class PlacementPolicyTest : public ::testing::Test {
return Status::OK();
}
+ shared_ptr<TSDescriptor> SelectReplica(const TSDescriptorVector&
source_ts_descs,
+ const optional<string>& dimension,
+ const optional<string>& range_key_start,
+ const optional<string>& table_id,
+ const std::set<shared_ptr<TSDescriptor>>& excluded) {
+ PlacementPolicy policy(descriptors_, &rng_);
+ return policy.SelectReplica(source_ts_descs, dimension, range_key_start,
table_id, excluded);
+ }
+
static Status TSDescriptorVectorToMap(const TSDescriptorVector& v_descs,
TSDescriptorsMap* m_descs) {
for (const auto& desc : v_descs) {
@@ -1555,5 +1565,38 @@ TEST_F(PlacementPolicyTest,
PlaceRangeAwareTabletReplicasWithLocations) {
}
}
+// This test is more granular and specifically tests SelectReplica() and its
+// various tiebreaker scenarios when choosing a tablet server to place a
replica on.
+TEST_F(PlacementPolicyTest, TestSelectReplicaTieBreakers) {
+ constexpr int nReplicationFactor = 3;
+ TabletNumByRangeMap range({ {"a1", 100} });
+ TabletNumByRangeMap ranges({ {"a1", 100}, {"b1", 100} });
+ const vector<LocationInfo> cluster_info = {
+ {
+ "",
+ {
+ { "ts0", 200, { }, { {"t1", range }, {"t2", range }, } },
+ { "ts1", 100, { }, { {"t1", range }, } },
+ { "ts2", 200, { }, { {"t1", ranges }, } },
+ }
+ },
+ };
+
+ {
+ ASSERT_OK(Prepare(cluster_info));
+ std::set<shared_ptr<TSDescriptor>> selected;
+ // All tablet servers have the same number of replicas per range "a1" of
table "t1".
+ // "ts1" is chosen first since it has the least number of replicas overall.
+ // "ts0" is chosen next since it has the least number of replicas of table
"t1".
+ vector<string> chosen_tservers_in_order = {"ts1", "ts0", "ts2"};
+ for (int i = 0; i < nReplicationFactor; ++i) {
+ auto ts = SelectReplica(descriptors(), nullopt, "a1", "t1", selected);
+ ASSERT_TRUE(ts);
+ ASSERT_TRUE(ts->permanent_uuid() == chosen_tservers_in_order[i]) <<
ts->permanent_uuid();
+ EmplaceOrDie(&selected, std::move(ts));
+ }
+ }
+}
+
} // namespace master
} // namespace kudu
diff --git a/src/kudu/master/placement_policy.cc
b/src/kudu/master/placement_policy.cc
index d2d95ea16..d78c67902 100644
--- a/src/kudu/master/placement_policy.cc
+++ b/src/kudu/master/placement_policy.cc
@@ -350,6 +350,12 @@ Status PlacementPolicy::SelectReplicas(const
TSDescriptorVector& source_ts_descs
// multiple tablet servers, the total number of replicas will be considered. If
// there's still a tie, a tablet server will be randomly chosen.
//
+// When choosing a tablet server to place a replica on from the set of tablet
+// servers "ts_descs", any tablet servers from the set "excluded" that are also
+// in the set "ts_descs" will not be chosen. It's possible that there's no
+// overlap between these two sets as "excluded" can contain tservers from
+// other locations and thus have no relation to the set "ts_descs".
+//
// The old replica selection algorithm follows the idea from
// "Power of Two Choices in Randomized Load Balancing"[1]. For each replica,
// we randomly select two tablet servers, and then assign the replica to the
@@ -381,8 +387,7 @@ shared_ptr<TSDescriptor> PlacementPolicy::SelectReplica(
const set<shared_ptr<TSDescriptor>>& excluded) const {
if (range_key_start && table_id) {
TSDescriptorVector ts_choices;
- const int ts_size = static_cast<int>(ts_descs.size());
- CHECK_GE(ts_size, 0);
+ const auto ts_size = ts_descs.size();
ReservoirSample(ts_descs, ts_size, excluded, rng_, &ts_choices);
DCHECK_LE(ts_choices.size(), ts_size);
if (ts_choices.size() > 1) {
@@ -393,23 +398,22 @@ shared_ptr<TSDescriptor> PlacementPolicy::SelectReplica(
return ts_choices.front();
}
return nullptr;
- } else {
- // Pick two random servers, excluding those we've already picked.
- // If we've only got one server left, 'two_choices' will actually
- // just contain one element.
- TSDescriptorVector two_choices;
- ReservoirSample(ts_descs, 2, excluded, rng_, &two_choices);
- DCHECK_LE(two_choices.size(), 2);
-
- if (two_choices.size() == 2) {
- // Pick the better of the two.
- return PickBetterTabletServer(two_choices, dimension, rng_);
- }
- if (two_choices.size() == 1) {
- return two_choices.front();
- }
- return nullptr;
}
+ // Pick two random servers, excluding those we've already picked.
+ // If we've only got one server left, 'two_choices' will actually
+ // just contain one element.
+ TSDescriptorVector two_choices;
+ ReservoirSample(ts_descs, 2, excluded, rng_, &two_choices);
+ DCHECK_LE(two_choices.size(), 2);
+
+ if (two_choices.size() == 2) {
+ // Pick the better of the two.
+ return PickBetterTabletServer(two_choices, dimension, rng_);
+ }
+ if (two_choices.size() == 1) {
+ return two_choices.front();
+ }
+ return nullptr;
}
Status PlacementPolicy::SelectLocation(
diff --git a/src/kudu/util/random_util.h b/src/kudu/util/random_util.h
index abeef3ee1..0e2631910 100644
--- a/src/kudu/util/random_util.h
+++ b/src/kudu/util/random_util.h
@@ -79,11 +79,11 @@ std::vector<T> SelectRandomSubset(const Container& c, int
min_to_return, Rand* r
// The results are not stored in a randomized order: the order of results will
// match their order in the input collection.
template<class Collection, class Set, class T, typename Rand>
-void ReservoirSample(const Collection& c, int k, const Set& avoid,
+void ReservoirSample(const Collection& c, uint32_t k, const Set& avoid,
Rand* r, std::vector<T>* result) {
result->clear();
result->reserve(k);
- int i = 0;
+ uint32_t i = 0;
for (const T& elem : c) {
if (ContainsKey(avoid, elem)) {
continue;
@@ -95,7 +95,7 @@ void ReservoirSample(const Collection& c, int k, const Set&
avoid,
continue;
}
// Otherwise replace existing elements with decreasing probability.
- int j = r->Uniform(i);
+ uint32_t j = r->Uniform(i);
if (j < k) {
(*result)[j] = elem;
}