Repository: hbase Updated Branches: refs/heads/branch-1.2 4160f7273 -> 3f9ba2f24
HBASE-18036 Data locality is not maintained after cluster restart or SSH (Stephen Yuan Jiang) Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/3f9ba2f2 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/3f9ba2f2 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/3f9ba2f2 Branch: refs/heads/branch-1.2 Commit: 3f9ba2f247ef0fb7cebf35a4501bd7cfa36197bc Parents: 4160f72 Author: Stephen Yuan Jiang <[email protected]> Authored: Tue Jun 20 12:34:23 2017 -0700 Committer: Stephen Yuan Jiang <[email protected]> Committed: Tue Jun 20 12:34:56 2017 -0700 ---------------------------------------------------------------------- .../hadoop/hbase/master/ServerManager.java | 8 ++++++ .../master/procedure/ServerCrashProcedure.java | 30 +++++++++++++++++--- 2 files changed, 34 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/3f9ba2f2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index f6f2d03..8313604 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -1115,6 +1115,14 @@ public class ServerManager { } /** + * Check whether a server is online based on hostname and port + * @return true if finding a server with matching hostname and port. + */ + public boolean isServerWithSameHostnamePortOnline(final ServerName serverName) { + return findServerWithSameHostnamePortWithLock(serverName) != null; + } + + /** * Check if a server is known to be dead. A server can be online, * or known to be dead, or unknown to this manager (i.e, not online, * not known to be dead either. it is simply not tracked by the http://git-wip-us.apache.org/repos/asf/hbase/blob/3f9ba2f2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index bfe3cc6..2788354 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -23,8 +23,10 @@ import java.io.InterruptedIOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.locks.Lock; @@ -546,14 +548,34 @@ implements ServerProcedureInterface { private boolean assign(final MasterProcedureEnv env, final List<HRegionInfo> hris) throws InterruptedIOException { - AssignmentManager am = env.getMasterServices().getAssignmentManager(); + MasterServices masterServices = env.getMasterServices(); + AssignmentManager am = masterServices.getAssignmentManager(); + // Determine what type of assignment to do if the dead server already restarted. + boolean retainAssignment = + (masterServices.getConfiguration().getBoolean("hbase.master.retain.assignment", true) && + masterServices.getServerManager().isServerWithSameHostnamePortOnline(serverName)) ? + true : false; try { - am.assign(hris); + if (retainAssignment) { + Map<HRegionInfo, ServerName> hriServerMap = + new HashMap<HRegionInfo, ServerName>(hris.size()); + for (HRegionInfo hri: hris) { + hriServerMap.put(hri, serverName); + } + LOG.info("Best effort in SSH to retain assignment of " + hris.size() + + " regions from the dead server " + serverName); + am.assign(hriServerMap); + } else { + LOG.info("Using round robin in SSH to assign " + hris.size() + + " regions from the dead server " + serverName); + am.assign(hris); + } } catch (InterruptedException ie) { - LOG.error("Caught " + ie + " during round-robin assignment"); + LOG.error("Caught " + ie + " during " + (retainAssignment ? "retaining" : "round-robin") + + " assignment"); throw (InterruptedIOException)new InterruptedIOException().initCause(ie); } catch (IOException ioe) { - LOG.info("Caught " + ioe + " during region assignment, will retry"); + LOG.warn("Caught " + ioe + " during region assignment, will retry"); return false; } return true;
