This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new a84c8376a KUDU-3532: Fix range aware replica placement bug
a84c8376a is described below
commit a84c8376a78b3a86be75d434e0f7ff2853c0c880
Author: Mahesh Reddy <[email protected]>
AuthorDate: Tue Dec 12 18:38:57 2023 -0500
KUDU-3532: Fix range aware replica placement bug
An implicit conversion from unsigned long to int caused
an std::length_error to be thrown when a vector tried to
reserve a a size greater than the max size. This happens
when a negative number is converted. This bug was
introduced by changelist [1].
I also added tests with tablet servers in multiple
locations. This omission caused this bug to go
unnoticed until now.
[1] https://github.com/apache/kudu/commit/10fdaf6a9
Change-Id: Id5d696d58965590a9f91f8b1b59f23225bbad8ee
Reviewed-on: http://gerrit.cloudera.org:8080/20781
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <[email protected]>
---
src/kudu/master/placement_policy-test.cc | 219 +++++++++++++++++++++++--------
src/kudu/master/placement_policy.cc | 7 +-
2 files changed, 170 insertions(+), 56 deletions(-)
diff --git a/src/kudu/master/placement_policy-test.cc
b/src/kudu/master/placement_policy-test.cc
index 84449ca7a..191f81d74 100644
--- a/src/kudu/master/placement_policy-test.cc
+++ b/src/kudu/master/placement_policy-test.cc
@@ -1085,6 +1085,17 @@ TEST_F(PlacementPolicyTest,
PlaceTabletReplicasWithRangesExtremeCase) {
ASSERT_EQ(500, stats["ts0"]);
ASSERT_EQ(500, stats["ts1"]);
}
+
+ {
+ ASSERT_OK(Prepare(cluster_info));
+ const auto& all = descriptors();
+ PlacementPolicy policy(all, rng());
+ const auto existing = GetDescriptors({"ts1"});
+ shared_ptr<TSDescriptor> extra_ts;
+ ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1",
&extra_ts));
+ ASSERT_TRUE(extra_ts);
+ ASSERT_TRUE(extra_ts->permanent_uuid() == "ts0") <<
extra_ts->permanent_uuid();
+ }
}
// RF is equal to the number of tablet servers, so replica placement will be
same with ranges.
@@ -1142,6 +1153,18 @@ TEST_F(PlacementPolicyTest,
PlaceTabletReplicasWithRanges) {
ASSERT_EQ(1000, stats["ts1"]);
ASSERT_EQ(1000, stats["ts2"]);
}
+
+ {
+ ASSERT_OK(Prepare(cluster_info));
+ const auto& all = descriptors();
+ PlacementPolicy policy(all, rng());
+ const auto existing = GetDescriptors({"ts2"});
+ shared_ptr<TSDescriptor> extra_ts;
+ ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1",
&extra_ts));
+ ASSERT_TRUE(extra_ts);
+ const auto& ts_uuid = extra_ts->permanent_uuid();
+ ASSERT_TRUE(ts_uuid == "ts0" || ts_uuid == "ts1") <<
extra_ts->permanent_uuid();
+ }
}
// RF is 3 while number of tablet servers is 4 with 2 of them empty and the
other 2 loaded with
@@ -1209,6 +1232,18 @@ TEST_F(PlacementPolicyTest,
PlaceTabletReplicasWithRangesAndTables) {
ASSERT_LE(368, stats["ts3"]);
ASSERT_GE(378, stats["ts3"]);
}
+
+ {
+ ASSERT_OK(Prepare(cluster_info));
+ const auto& all = descriptors();
+ PlacementPolicy policy(all, rng());
+ const auto existing = GetDescriptors({"ts3"});
+ shared_ptr<TSDescriptor> extra_ts;
+ ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1",
&extra_ts));
+ ASSERT_TRUE(extra_ts);
+ const auto& ts_uuid = extra_ts->permanent_uuid();
+ ASSERT_TRUE(ts_uuid == "ts0" || ts_uuid == "ts1") <<
extra_ts->permanent_uuid();
+ }
}
// RF is 3 while number of tablet servers is 6. Half of them are empty while
the other half contain
@@ -1288,6 +1323,20 @@ TEST_F(PlacementPolicyTest,
PlaceTabletReplicasWithRangesAndTablesWithDoubleTSer
ASSERT_LE(137, stats["ts5"]);
ASSERT_GE(147, stats["ts5"]);
}
+
+ {
+ ASSERT_OK(Prepare(cluster_info));
+ const auto& all = descriptors();
+ PlacementPolicy policy(all, rng());
+ const auto existing = GetDescriptors({"ts4", "ts5"});
+ shared_ptr<TSDescriptor> extra_ts;
+ ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1",
&extra_ts));
+ ASSERT_TRUE(extra_ts);
+ const auto& ts_uuid = extra_ts->permanent_uuid();
+ ASSERT_TRUE(ts_uuid == "ts0" ||
+ ts_uuid == "ts1" ||
+ ts_uuid == "ts2") << extra_ts->permanent_uuid();
+ }
}
// With RF = 3, 6 tablet servers, and a well-balanced cluster initially, this
test case adds
@@ -1377,68 +1426,132 @@ TEST_F(PlacementPolicyTest,
PlaceTabletReplicasWithImbalanceByTable) {
},
};
- ASSERT_OK(Prepare(cluster_info));
- const auto& all = descriptors();
- PlacementPolicy policy(all, rng());
- typedef std::unordered_map<string, std::unordered_map<string, int>>
replicas_per_range_per_table;
- std::unordered_map<string, replicas_per_range_per_table> stats;
- vector<string> ranges = {"a1", "b1", "c1", "d1", "e1", "f1"};
- vector<string> tables = {"t1", "t2"};
- for (const auto& table : tables) {
- for (const auto& range : ranges) {
- for (auto i = 0; i < 500; ++i) {
- TSDescriptorVector result;
- ASSERT_OK(policy.PlaceTabletReplicas(nReplicationFactor, nullopt,
range, table, &result));
- ASSERT_EQ(nReplicationFactor, result.size());
- for (const auto& ts : result) {
- const auto& ts_uuid = ts->permanent_uuid();
- ++stats[ts_uuid][table][range];
+ {
+ ASSERT_OK(Prepare(cluster_info));
+ const auto& all = descriptors();
+ PlacementPolicy policy(all, rng());
+ typedef std::unordered_map<string, std::unordered_map<string, int>>
+ replicas_per_range_per_table;
+ std::unordered_map<string, replicas_per_range_per_table> stats;
+ vector<string> ranges = {"a1", "b1", "c1", "d1", "e1", "f1"};
+ vector<string> tables = {"t1", "t2"};
+ for (const auto& table : tables) {
+ for (const auto& range : ranges) {
+ for (auto i = 0; i < 500; ++i) {
+ TSDescriptorVector result;
+ ASSERT_OK(policy.PlaceTabletReplicas(nReplicationFactor, nullopt,
range, table, &result));
+ ASSERT_EQ(nReplicationFactor, result.size());
+ for (const auto& ts : result) {
+ const auto& ts_uuid = ts->permanent_uuid();
+ ++stats[ts_uuid][table][range];
+ }
}
}
}
- }
- ASSERT_EQ(kNumServers, stats.size());
- vector<string> balanced_ranges = {"c1", "d1", "e1", "f1"};
- vector<string> tservers1 = {"ts0", "ts2", "ts4"};
- for (const auto& stat : stats) {
- int tserver_replicas = 0;
- // Verifies that two tables exist on each tserver.
- ASSERT_EQ(2, stat.second.size());
- for (const auto& table : stat.second) {
- // Verifies that the six ranges exist for each table on each tserver.
- ASSERT_EQ(6, table.second.size());
- for (const auto& ranges : table.second) {
- if (std::find(balanced_ranges.begin(),
- balanced_ranges.end(), ranges.first) !=
balanced_ranges.end()) {
- ASSERT_LE(245, ranges.second);
- ASSERT_GE(255, ranges.second);
- } else if (std::find(tservers1.begin(),
- tservers1.end(), stat.first) != tservers1.end()) {
- // Only range of "a1" or "b1" now. Checks which tservers and for
table "t1" or "t2".
- if (table.first == "t1") {
- ASSERT_LE(120, ranges.second);
- ASSERT_GE(130, ranges.second);
- } else {
- ASSERT_LE(370, ranges.second);
- ASSERT_GE(380, ranges.second);
- }
- } else {
- // Ranges "a1" or "b1" for tservers "ts1", "ts3", "ts5". Checks for
table "t1" or "t2".
- if (table.first == "t1") {
- ASSERT_LE(370, ranges.second);
- ASSERT_GE(380, ranges.second);
+ ASSERT_EQ(kNumServers, stats.size());
+ vector<string> balanced_ranges = {"c1", "d1", "e1", "f1"};
+ vector<string> tservers1 = {"ts0", "ts2", "ts4"};
+ for (const auto& stat : stats) {
+ int tserver_replicas = 0;
+ // Verifies that two tables exist on each tserver.
+ ASSERT_EQ(2, stat.second.size());
+ for (const auto& table : stat.second) {
+ // Verifies that the six ranges exist for each table on each tserver.
+ ASSERT_EQ(6, table.second.size());
+ for (const auto& ranges : table.second) {
+ if (std::find(balanced_ranges.begin(),
+ balanced_ranges.end(), ranges.first) !=
balanced_ranges.end()) {
+ ASSERT_LE(245, ranges.second);
+ ASSERT_GE(255, ranges.second);
+ } else if (std::find(tservers1.begin(),
+ tservers1.end(), stat.first) !=
tservers1.end()) {
+ // Only range of "a1" or "b1" now. Checks which tservers and for
table "t1" or "t2".
+ if (table.first == "t1") {
+ ASSERT_LE(120, ranges.second);
+ ASSERT_GE(130, ranges.second);
+ } else {
+ ASSERT_LE(370, ranges.second);
+ ASSERT_GE(380, ranges.second);
+ }
} else {
- ASSERT_LE(120, ranges.second);
- ASSERT_GE(130, ranges.second);
+ // Ranges "a1" or "b1" for tservers "ts1", "ts3", "ts5". Checks
for table "t1" or "t2".
+ if (table.first == "t1") {
+ ASSERT_LE(370, ranges.second);
+ ASSERT_GE(380, ranges.second);
+ } else {
+ ASSERT_LE(120, ranges.second);
+ ASSERT_GE(130, ranges.second);
+ }
}
+ tserver_replicas += ranges.second;
}
- tserver_replicas += ranges.second;
}
+ // Verifies that around 3000 replicas are placed on each tserver.
+ ASSERT_LE(2990, tserver_replicas);
+ ASSERT_GE(3110, tserver_replicas);
}
- // Verifies that around 3000 replicas are placed on each tserver.
- ASSERT_LE(2990, tserver_replicas);
- ASSERT_GE(3110, tserver_replicas);
+ }
+
+ {
+ ASSERT_OK(Prepare(cluster_info));
+ const auto& all = descriptors();
+ PlacementPolicy policy(all, rng());
+ const auto existing = GetDescriptors({"ts0", "ts2", "ts4"});
+ shared_ptr<TSDescriptor> extra_ts;
+ ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1",
&extra_ts));
+ ASSERT_TRUE(extra_ts);
+ const auto& ts_uuid = extra_ts->permanent_uuid();
+ ASSERT_TRUE(ts_uuid == "ts1" ||
+ ts_uuid == "ts3" ||
+ ts_uuid == "ts5") << extra_ts->permanent_uuid();
+ }
+}
+
+TEST_F(PlacementPolicyTest, PlaceRangeAwareTabletReplicasWithLocations) {
+ TabletNumByRangeMap range_a({ {"a1", 2}});
+ TabletNumByRangeMap range_double_a({ {"a1", 4}});
+ TabletNumByRangeMap range_b({ {"b1", 2}});
+ TabletNumByRangeMap range_double_b({ {"b1", 2}});
+ TabletNumByRangeMap ranges_a_and_b({ {"a1", 2}, {"b1", 2} });
+ const vector<LocationInfo> cluster_info = {
+ { "A", { { "A_ts0", 4, { }, { {"t1", range_double_a }, } },
+ { "A_ts1", 4, { }, { {"t1", range_double_a }, } },
+ { "A_ts2", 4, { }, { {"t1", range_double_b }, } }, } },
+ { "B", { { "B_ts0", 4, { }, { {"t1", ranges_a_and_b }, } },
+ { "B_ts1", 4, { }, { {"t1", range_a}, {"t2", range_a} } }, } },
+ { "C", { { "C_ts0", 6, { }, { {"t1", range_a }, {"t2", ranges_a_and_b },
} },
+ { "C_ts1", 4, { }, { {"t1", range_a }, {"t2", range_b} } }, } },
+ };
+
+ // To avoid placing a majority of replicas in one location,
+ // exactly one replica will be placed in each location.
+ // "A_ts2" is chosen because it has the least replicas per range "a1" in
table "t1".
+ // "B_ts1" is chosen because it wins the replicas per table tiebreaker with
"B_ts0".
+ // "C_ts1" is chosen because it wins the total replicas tiebreaker.
+ {
+ ASSERT_OK(Prepare(cluster_info));
+ const auto& all = descriptors();
+ PlacementPolicy policy(all, rng());
+ TSDescriptorVector result;
+ ASSERT_OK(policy.PlaceTabletReplicas(3, nullopt, "a1", "t1", &result));
+ ASSERT_EQ(3, result.size());
+ TSDescriptorsMap m;
+ ASSERT_OK(TSDescriptorVectorToMap(result, &m));
+ ASSERT_EQ(1, m.count("A_ts2"));
+ ASSERT_EQ(1, m.count("B_ts1"));
+ ASSERT_EQ(1, m.count("C_ts1"));
+ }
+
+ {
+ ASSERT_OK(Prepare(cluster_info));
+ const auto& all = descriptors();
+ PlacementPolicy policy(all, rng());
+ const auto existing = GetDescriptors({"A_ts0", "A_ts1", "B_ts0", "B_ts1",
"C_ts0", "C_ts1"});
+ shared_ptr<TSDescriptor> extra_ts;
+ ASSERT_OK(policy.PlaceExtraTabletReplica(existing, nullopt, "a1", "t1",
&extra_ts));
+ ASSERT_TRUE(extra_ts);
+ ASSERT_TRUE(extra_ts->permanent_uuid() == "A_ts2") <<
extra_ts->permanent_uuid();
}
}
diff --git a/src/kudu/master/placement_policy.cc
b/src/kudu/master/placement_policy.cc
index 44c0cb292..d2d95ea16 100644
--- a/src/kudu/master/placement_policy.cc
+++ b/src/kudu/master/placement_policy.cc
@@ -381,9 +381,10 @@ shared_ptr<TSDescriptor> PlacementPolicy::SelectReplica(
const set<shared_ptr<TSDescriptor>>& excluded) const {
if (range_key_start && table_id) {
TSDescriptorVector ts_choices;
- auto choices_size = ts_descs.size() - excluded.size();
- ReservoirSample(ts_descs, choices_size, excluded, rng_, &ts_choices);
- DCHECK_EQ(ts_choices.size(), choices_size);
+ const int ts_size = static_cast<int>(ts_descs.size());
+ CHECK_GE(ts_size, 0);
+ ReservoirSample(ts_descs, ts_size, excluded, rng_, &ts_choices);
+ DCHECK_LE(ts_choices.size(), ts_size);
if (ts_choices.size() > 1) {
return PickTabletServer(
ts_choices, range_key_start.value(), table_id.value(), dimension,
rng_);