This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 9f7c80d  [rebalancer_tool] refactor on IgnoredTserversRunner
9f7c80d is described below

commit 9f7c80d630ebd7a7e2da2edacd910bcbd00f29ee
Author: zhangyifan27 <[email protected]>
AuthorDate: Thu Dec 30 11:44:16 2021 +0800

    [rebalancer_tool] refactor on IgnoredTserversRunner
    
    ClusterInfo::tservers_to_empty is not needed to be populated when
    'BuildClusterInfo' called, we use it in two cases:
    - print replica count about tservers_to_empty.
    - get replicas running on tservers_to_empty (used in IgnoredTserverRunner).
    But 'BuildClusterInfo" could be called in many places.
    
    This patch removed it from ClusterInfo so we could get the information
    only when needed.
    
    Change-Id: I4e51349d12fffb54458142169b9e7789c8d3987c
    Reviewed-on: http://gerrit.cloudera.org:8080/18132
    Tested-by: Alexey Serbin <[email protected]>
    Reviewed-by: Alexey Serbin <[email protected]>
---
 src/kudu/rebalance/rebalance_algo.h |  4 --
 src/kudu/rebalance/rebalancer.cc    | 69 +++++++++++++++++++++++-------
 src/kudu/rebalance/rebalancer.h     | 14 +++++++
 src/kudu/tools/rebalancer_tool.cc   | 83 ++++++++++++++-----------------------
 src/kudu/tools/rebalancer_tool.h    | 19 +++------
 5 files changed, 106 insertions(+), 83 deletions(-)

diff --git a/src/kudu/rebalance/rebalance_algo.h 
b/src/kudu/rebalance/rebalance_algo.h
index b827f0a..67f1267 100644
--- a/src/kudu/rebalance/rebalance_algo.h
+++ b/src/kudu/rebalance/rebalance_algo.h
@@ -88,10 +88,6 @@ struct ClusterInfo {
 
   // Locality information for a cluster.
   ClusterLocalityInfo locality;
-
-  // Mapping tserver identifier --> total replica count on the server.
-  // Replicas on these tablet servers need to move to other tservers in the 
cluster.
-  std::unordered_map<std::string, int> tservers_to_empty;
 };
 
 // A directive to move some replica of a table between two tablet servers.
diff --git a/src/kudu/rebalance/rebalancer.cc b/src/kudu/rebalance/rebalancer.cc
index 9f2e0a2..2d5fdc2 100644
--- a/src/kudu/rebalance/rebalancer.cc
+++ b/src/kudu/rebalance/rebalancer.cc
@@ -321,7 +321,8 @@ Status Rebalancer::BuildClusterInfo(const ClusterRawInfo& 
raw_info,
       skipped_tserver_msgs.emplace_back(
           Substitute("skipping tablet server $0 ($1) because of its "
                      "non-HEALTHY status ($2)",
-                     s.uuid, s.address,
+                     s.uuid,
+                     s.address,
                      ServerHealthToString(s.health)));
       unhealthy_tablet_servers.emplace(s.uuid);
       continue;
@@ -454,25 +455,65 @@ Status Rebalancer::BuildClusterInfo(const ClusterRawInfo& 
raw_info,
     table_info_by_skew.emplace(max_count - min_count, std::move(tbi));
   }
 
-  // Populate ClusterInfo::tservers_to_empty.
-  // TODO(zhangyifan): extract this to a seperate method.
-  if (config_.move_replicas_from_ignored_tservers) {
-    auto& tservers_to_empty = result_info.tservers_to_empty;
-    for (const auto& ignored_tserver : config_.ignored_tservers) {
-      if (ContainsKey(unhealthy_tablet_servers, ignored_tserver)) {
-        continue;
-      }
-      int replica_count = FindWithDefault(tserver_replicas_count, 
ignored_tserver, 0);
-      tservers_to_empty.emplace(ignored_tserver, replica_count);
-    }
-  }
-
   // TODO(aserbin): add sanity checks on the result.
   *info = std::move(result_info);
 
   return Status::OK();
 }
 
+void Rebalancer::GetTServersToEmpty(const ClusterRawInfo& raw_info,
+                                    unordered_set<string>* tservers_to_empty) 
const {
+  DCHECK(tservers_to_empty);
+  tservers_to_empty->clear();
+  for (const auto& ts_summary : raw_info.tserver_summaries) {
+    if (ts_summary.health == ServerHealth::HEALTHY &&
+        ContainsKey(config_.ignored_tservers, ts_summary.uuid)) {
+      tservers_to_empty->emplace(ts_summary.uuid);
+    }
+  }
+}
+
+void Rebalancer::BuildTServersToEmptyInfo(const ClusterRawInfo& raw_info,
+                                          const MovesInProgress& 
moves_in_progress,
+                                          const unordered_set<string>& 
tservers_to_empty,
+                                          TServersToEmptyMap* 
tservers_to_empty_map) {
+  DCHECK(tservers_to_empty_map);
+  tservers_to_empty_map->clear();
+  for (const auto& tablet_summary : raw_info.tablet_summaries) {
+    if (ContainsKey(moves_in_progress, tablet_summary.id)) {
+      continue;
+    }
+    if (tablet_summary.result != cluster_summary::HealthCheckResult::HEALTHY &&
+        tablet_summary.result != 
cluster_summary::HealthCheckResult::RECOVERING) {
+      LOG(INFO) << Substitute(
+          "tablet $0: not considering replicas for movement "
+          "since the tablet's status is '$1'",
+          tablet_summary.id,
+          cluster_summary::HealthCheckResultToString(tablet_summary.result));
+      continue;
+    }
+    TabletInfo tablet_info;
+    for (const auto& replica_info : tablet_summary.replicas) {
+      if (replica_info.is_leader && replica_info.consensus_state) {
+        const auto& cstate = *replica_info.consensus_state;
+        if (cstate.opid_index) {
+          tablet_info.tablet_id = tablet_summary.id;
+          tablet_info.config_idx = *cstate.opid_index;
+          break;
+        }
+      }
+    }
+    for (const auto& replica_info : tablet_summary.replicas) {
+      if (!ContainsKey(tservers_to_empty, replica_info.ts_uuid)) {
+        continue;
+      }
+      auto& tablets =
+          LookupOrEmplace(tservers_to_empty_map, replica_info.ts_uuid, 
vector<TabletInfo>());
+      tablets.emplace_back(tablet_info);
+    }
+  }
+}
+
 void BuildTabletExtraInfoMap(
     const ClusterRawInfo& raw_info,
     std::unordered_map<std::string, TabletExtraInfo>* extra_info_by_tablet_id) 
{
diff --git a/src/kudu/rebalance/rebalancer.h b/src/kudu/rebalance/rebalancer.h
index 31d3364..15cf5bc 100644
--- a/src/kudu/rebalance/rebalancer.h
+++ b/src/kudu/rebalance/rebalancer.h
@@ -193,6 +193,20 @@ class Rebalancer {
                           const MovesInProgress& moves_in_progress,
                           ClusterInfo* info) const;
 
+  struct TabletInfo {
+    std::string tablet_id;
+    boost::optional<int64_t> config_idx;  // For CAS-like change of Raft 
configs.
+  };
+
+  // Mapping tserver UUID to tablets on it.
+  typedef std::unordered_map<std::string, std::vector<TabletInfo>> 
TServersToEmptyMap;
+  void GetTServersToEmpty(const ClusterRawInfo& raw_info,
+                          std::unordered_set<std::string>* tservers_to_empty) 
const;
+  static void BuildTServersToEmptyInfo(const ClusterRawInfo& raw_info,
+                                       const MovesInProgress& 
moves_in_progress,
+                                       const std::unordered_set<std::string>& 
tservers_to_empty,
+                                       TServersToEmptyMap* 
tservers_to_empty_map);
+
  protected:
   // Helper class to find and schedule next available rebalancing move 
operation
   // and track already scheduled ones.
diff --git a/src/kudu/tools/rebalancer_tool.cc 
b/src/kudu/tools/rebalancer_tool.cc
index 45d22b4..b69ca05 100644
--- a/src/kudu/tools/rebalancer_tool.cc
+++ b/src/kudu/tools/rebalancer_tool.cc
@@ -110,8 +110,8 @@ Status RebalancerTool::PrintStats(ostream& out) {
     return Status::OK();
   }
 
-  // Print information about replica count of healthy ignored tservers.
-  RETURN_NOT_OK(PrintIgnoredTserversStats(ci, out));
+  // Print information about tservers need to empty.
+  RETURN_NOT_OK(PrintIgnoredTserversStats(raw_info, out));
 
   if (ts_id_by_location.size() == 1) {
     // That's about printing information about the whole cluster.
@@ -350,16 +350,20 @@ Status RebalancerTool::KsckResultsToClusterRawInfo(const 
boost::optional<string>
   return Status::OK();
 }
 
-Status RebalancerTool::PrintIgnoredTserversStats(const ClusterInfo& ci,
+Status RebalancerTool::PrintIgnoredTserversStats(const ClusterRawInfo& 
raw_info,
                                                  ostream& out) const {
-  const auto& tservers_to_empty = ci.tservers_to_empty;
-  if (tservers_to_empty.empty()) {
+  if (config_.ignored_tservers.empty() || 
!config_.move_replicas_from_ignored_tservers) {
     return Status::OK();
   }
+  unordered_set<string> tservers_to_empty;
+  GetTServersToEmpty(raw_info, &tservers_to_empty);
+  TServersToEmptyMap tservers_to_empty_map;
+  BuildTServersToEmptyInfo(raw_info, MovesInProgress(), tservers_to_empty, 
&tservers_to_empty_map);
   out << "Per-server replica distribution summary for tservers_to_empty:" << 
endl;
   DataTable summary({"Server UUID", "Replica Count"});
-  for (const auto& elem: tservers_to_empty) {
-    summary.AddRow({ elem.first, to_string(elem.second) });
+  for (const auto& ts : tservers_to_empty) {
+    auto* tablets = FindOrNull(tservers_to_empty_map, ts);
+    summary.AddRow({ts, tablets ? to_string(tablets->size()) : "0"});
   }
   RETURN_NOT_OK(summary.PrintTo(out));
   out << endl;
@@ -1376,52 +1380,26 @@ Status 
RebalancerTool::IgnoredTserversRunner::GetReplaceMoves(
     const rebalance::ClusterInfo& ci,
     const rebalance::ClusterRawInfo& raw_info,
     vector<Rebalancer::ReplicaMove>* replica_moves) {
-  RETURN_NOT_OK(CheckIgnoredTServers(raw_info, ci));
+  unordered_set<string> tservers_to_empty;
+  rebalancer_->GetTServersToEmpty(raw_info, &tservers_to_empty);
+  RETURN_NOT_OK(CheckIgnoredTServers(raw_info, ci, tservers_to_empty));
 
-  // Build IgnoredTserversInfo.
-  IgnoredTserversInfo ignored_tservers_info;
-  for (const auto& tablet_summary : raw_info.tablet_summaries) {
-    if (ContainsKey(scheduled_moves_, tablet_summary.id)) {
-      continue;
-    }
-    if (tablet_summary.result != cluster_summary::HealthCheckResult::HEALTHY &&
-        tablet_summary.result != 
cluster_summary::HealthCheckResult::RECOVERING) {
-      LOG(INFO) << Substitute("tablet $0: not considering replicas for 
movement "
-                              "since the tablet's status is '$1'",
-                              tablet_summary.id,
-                              
cluster_summary::HealthCheckResultToString(tablet_summary.result));
-      continue;
-    }
-    TabletInfo tablet_info;
-    for (const auto& replica_info : tablet_summary.replicas) {
-      if (replica_info.is_leader && replica_info.consensus_state) {
-        const auto& cstate = *replica_info.consensus_state;
-        if (cstate.opid_index) {
-          tablet_info.tablet_id = tablet_summary.id;
-          tablet_info.config_idx = *cstate.opid_index;
-          break;
-        }
-      }
-    }
-    for (const auto& replica_info : tablet_summary.replicas) {
-      if (!ContainsKey(ci.tservers_to_empty, replica_info.ts_uuid)) {
-        continue;
-      }
-      auto& tablets = LookupOrEmplace(
-          &ignored_tservers_info, replica_info.ts_uuid, vector<TabletInfo>());
-      tablets.emplace_back(tablet_info);
-    }
-  }
-  GetMovesFromIgnoredTservers(ignored_tservers_info, replica_moves);
+  TServersToEmptyMap tservers_to_empty_map;
+  BuildTServersToEmptyInfo(raw_info, scheduled_moves_, tservers_to_empty, 
&tservers_to_empty_map);
+
+  GetMovesFromIgnoredTservers(tservers_to_empty_map, replica_moves);
   return Status::OK();
 }
 
-Status RebalancerTool::IgnoredTserversRunner::CheckIgnoredTServers(const 
ClusterRawInfo& raw_info,
-                                                                   const 
ClusterInfo& ci) {
-  if (ci.tservers_to_empty.empty()) {
+Status RebalancerTool::IgnoredTserversRunner::CheckIgnoredTServers(
+    const ClusterRawInfo& raw_info,
+    const ClusterInfo& ci,
+    const unordered_set<string>& tservers_to_empty) {
+  if (tservers_to_empty.empty()) {
     return Status::OK();
   }
 
+  // Check whether it is possible to move replicas emptying some tablet 
servers.
   int remaining_tservers_count = 
ci.balance.servers_by_total_replica_count.size();
   int max_replication_factor = 0;
   for (const auto& s : raw_info.table_summaries) {
@@ -1435,18 +1413,19 @@ Status 
RebalancerTool::IgnoredTserversRunner::CheckIgnoredTServers(const Cluster
                    max_replication_factor));
   }
 
-  for (const auto& it : ci.tservers_to_empty) {
-    const string& ts_uuid = it.first;
-    if (!ContainsKey(raw_info.tservers_in_maintenance_mode, ts_uuid)) {
-      return Status::IllegalState(Substitute(
-        "You should set maintenance mode for tablet server $0 first", 
ts_uuid));
+  // Make sure tablet servers that we need to empty are set maintenance mode.
+  for (const auto& ts : tservers_to_empty) {
+    if (!ContainsKey(raw_info.tservers_in_maintenance_mode, ts)) {
+      return Status::IllegalState(
+          Substitute("You should set maintenance mode for tablet server $0 
first", ts));
     }
   }
+
   return Status::OK();
 }
 
 void RebalancerTool::IgnoredTserversRunner::GetMovesFromIgnoredTservers(
-    const IgnoredTserversInfo& ignored_tservers_info,
+    const TServersToEmptyMap& ignored_tservers_info,
     vector<Rebalancer::ReplicaMove>* replica_moves) {
   DCHECK(replica_moves);
   if (ignored_tservers_info.empty()) {
diff --git a/src/kudu/tools/rebalancer_tool.h b/src/kudu/tools/rebalancer_tool.h
index fb601b4..2beda27 100644
--- a/src/kudu/tools/rebalancer_tool.h
+++ b/src/kudu/tools/rebalancer_tool.h
@@ -343,14 +343,6 @@ class RebalancerTool : public rebalance::Rebalancer {
     // Key is tserver UUID which corresponds to value.ts_uuid_from.
     typedef std::unordered_multimap<std::string, Rebalancer::ReplicaMove> 
MovesToSchedule;
 
-    struct TabletInfo {
-      std::string tablet_id;
-      boost::optional<int64_t> config_idx;  // For CAS-like change of Raft 
configs.
-    };
-
-    // Mapping tserver UUID to tablets on it.
-    typedef std::unordered_map<std::string, std::vector<TabletInfo>> 
IgnoredTserversInfo;
-
     // Get replica moves to move replicas from healthy ignored tservers.
     // If returns Status::OK() with replica_moves empty, there would be
     // no replica on the healthy ignored tservers.
@@ -360,10 +352,11 @@ class RebalancerTool : public rebalance::Rebalancer {
 
     // Return Status::OK() if it's safe to move all replicas from the ignored 
to other servers
     // and all tservers that need to empty are in maintenance mode.
-    Status CheckIgnoredTServers(const rebalance::ClusterRawInfo& raw_info,
-                                const rebalance::ClusterInfo& ci);
+    static Status CheckIgnoredTServers(const rebalance::ClusterRawInfo& 
raw_info,
+                                       const rebalance::ClusterInfo& ci,
+                                       const std::unordered_set<std::string>& 
tservers_to_empty);
 
-    void GetMovesFromIgnoredTservers(const IgnoredTserversInfo& 
ignored_tservers_info,
+    void GetMovesFromIgnoredTservers(const TServersToEmptyMap& 
ignored_tservers_info,
                                      std::vector<Rebalancer::ReplicaMove>* 
replica_moves);
 
     // Random device and generator for selecting among multiple choices, when 
appropriate.
@@ -380,8 +373,8 @@ class RebalancerTool : public rebalance::Rebalancer {
                                      const KsckResults& ksck_info,
                                      rebalance::ClusterRawInfo* raw_info);
 
-  // Print replica count infomation on ClusterInfo::tservers_to_empty.
-  Status PrintIgnoredTserversStats(const rebalance::ClusterInfo& ci,
+  // Print replica count infomation about tservers need to empty.
+  Status PrintIgnoredTserversStats(const rebalance::ClusterRawInfo& raw_info,
                                    std::ostream& out) const;
 
   // Print information on the cross-location balance.

Reply via email to