[09/50] [abbrv] hbase git commit: HBASE-20137 TestRSGroups is flakey
HBASE-20137 TestRSGroups is flakey On failed RPC we expire the server and suspend expecting the resultant ServerCrashProcedure to wake us back up again. In tests, TestRSGroup hung because it failed to schedule a server expiration because the server was already expired undergoing processing (the test was shutting down). Deal with this case by having expire servers return false if unable to expire. Callers will then know where a ServerCrashProcedure has been scheduled or not. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Have expireServer return true if successful. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java The log that included an exception whose message was the current procedure as a String totally baffled me. Make it more obvious what exception is. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java If failed expire of a server, wake our procedure -- do not suspend -- and presume ok to move region to CLOSED state (because going down or concurrent crashed server processing ongoing). Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/6d1740d4 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/6d1740d4 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/6d1740d4 Branch: refs/heads/HBASE-19397-branch-2 Commit: 6d1740d498d3f0f301a87a0a0cd598827790efa5 Parents: b59c39d Author: Michael Stack Authored: Mon Mar 5 21:20:23 2018 -0800 Committer: Michael Stack Committed: Mon Mar 5 22:53:27 2018 -0800 -- .../hadoop/hbase/master/ServerManager.java | 17 --- .../FailedRemoteDispatchException.java | 9 +++- .../assignment/RegionTransitionProcedure.java | 15 --- .../master/assignment/UnassignProcedure.java| 47 .../assignment/TestAssignmentManager.java | 10 - 5 files changed, 64 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hbase/blob/6d1740d4/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java -- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 06d6c8b..e2f0b6b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -555,15 +555,17 @@ public class ServerManager { } /* - * Expire the passed server. Add it to list of dead servers and queue a - * shutdown processing. + * Expire the passed server. Add it to list of dead servers and queue a shutdown processing. + * @return True if we expired passed serverName else false if we failed to schedule + * an expire (and attendant ServerCrashProcedure -- some clients are dependent on + * server crash procedure being queued and need to know if has not been queued). */ - public synchronized void expireServer(final ServerName serverName) { + public synchronized boolean expireServer(final ServerName serverName) { if (serverName.equals(master.getServerName())) { if (!(master.isAborted() || master.isStopped())) { master.stop("We lost our znode?"); } - return; + return false; } if (!master.isServerCrashProcessingEnabled()) { LOG.info("Master doesn't enable ServerShutdownHandler during initialization, " @@ -573,13 +575,13 @@ public class ServerManager { // the SCP is not enable yet and Meta's RIT may be suspend forever. See HBase-19287 master.getAssignmentManager().handleMetaRITOnCrashedServer(serverName); this.queuedDeadServers.add(serverName); - return; + return false; } if (this.deadservers.isDeadServer(serverName)) { // TODO: Can this happen? It shouldn't be online in this case? LOG.warn("Expiration of " + serverName + " but server shutdown already in progress"); - return; + return false; } moveFromOnlineToDeadServers(serverName); @@ -591,7 +593,7 @@ public class ServerManager { if (this.onlineServers.isEmpty()) { master.stop("Cluster shutdown set; onlineServer=0"); } - return; + return false; } LOG.info("Processing expiration of " + serverName + " on " + this.master.getServerName()); master.getAssignmentManager().submitServerCrash(serverName, true); @@ -602,6 +604,7 @@ public class ServerManager { listener.serverRemoved(serverName); } } +return true; } @VisibleForTesting http://git-wip-us.apache.org/repos/asf/hbase/blob/6d1740d4/hbase-server/src/mai
[18/33] hbase git commit: HBASE-20137 TestRSGroups is flakey
HBASE-20137 TestRSGroups is flakey On failed RPC we expire the server and suspend expecting the resultant ServerCrashProcedure to wake us back up again. In tests, TestRSGroup hung because it failed to schedule a server expiration because the server was already expired undergoing processing (the test was shutting down). Deal with this case by having expire servers return false if unable to expire. Callers will then know where a ServerCrashProcedure has been scheduled or not. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Have expireServer return true if successful. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java The log that included an exception whose message was the current procedure as a String totally baffled me. Make it more obvious what exception is. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java If failed expire of a server, wake our procedure -- do not suspend -- and presume ok to move region to CLOSED state (because going down or concurrent crashed server processing ongoing). Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/1f5e93a8 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/1f5e93a8 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/1f5e93a8 Branch: refs/heads/HBASE-19064 Commit: 1f5e93a8f85c702f65c27c6162cd10c2035e481d Parents: 7889df3 Author: Michael Stack Authored: Mon Mar 5 21:20:23 2018 -0800 Committer: Michael Stack Committed: Tue Mar 6 10:55:40 2018 -0800 -- .../hadoop/hbase/master/ServerManager.java | 17 --- .../FailedRemoteDispatchException.java | 9 +++- .../assignment/RegionTransitionProcedure.java | 15 --- .../master/assignment/UnassignProcedure.java| 47 .../assignment/TestAssignmentManager.java | 10 - 5 files changed, 64 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hbase/blob/1f5e93a8/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java -- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 06d6c8b..e2f0b6b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -555,15 +555,17 @@ public class ServerManager { } /* - * Expire the passed server. Add it to list of dead servers and queue a - * shutdown processing. + * Expire the passed server. Add it to list of dead servers and queue a shutdown processing. + * @return True if we expired passed serverName else false if we failed to schedule + * an expire (and attendant ServerCrashProcedure -- some clients are dependent on + * server crash procedure being queued and need to know if has not been queued). */ - public synchronized void expireServer(final ServerName serverName) { + public synchronized boolean expireServer(final ServerName serverName) { if (serverName.equals(master.getServerName())) { if (!(master.isAborted() || master.isStopped())) { master.stop("We lost our znode?"); } - return; + return false; } if (!master.isServerCrashProcessingEnabled()) { LOG.info("Master doesn't enable ServerShutdownHandler during initialization, " @@ -573,13 +575,13 @@ public class ServerManager { // the SCP is not enable yet and Meta's RIT may be suspend forever. See HBase-19287 master.getAssignmentManager().handleMetaRITOnCrashedServer(serverName); this.queuedDeadServers.add(serverName); - return; + return false; } if (this.deadservers.isDeadServer(serverName)) { // TODO: Can this happen? It shouldn't be online in this case? LOG.warn("Expiration of " + serverName + " but server shutdown already in progress"); - return; + return false; } moveFromOnlineToDeadServers(serverName); @@ -591,7 +593,7 @@ public class ServerManager { if (this.onlineServers.isEmpty()) { master.stop("Cluster shutdown set; onlineServer=0"); } - return; + return false; } LOG.info("Processing expiration of " + serverName + " on " + this.master.getServerName()); master.getAssignmentManager().submitServerCrash(serverName, true); @@ -602,6 +604,7 @@ public class ServerManager { listener.serverRemoved(serverName); } } +return true; } @VisibleForTesting http://git-wip-us.apache.org/repos/asf/hbase/blob/1f5e93a8/hbase-server/src/main/java/or
hbase git commit: HBASE-20137 TestRSGroups is flakey
Repository: hbase Updated Branches: refs/heads/branch-2 b59c39d94 -> 6d1740d49 HBASE-20137 TestRSGroups is flakey On failed RPC we expire the server and suspend expecting the resultant ServerCrashProcedure to wake us back up again. In tests, TestRSGroup hung because it failed to schedule a server expiration because the server was already expired undergoing processing (the test was shutting down). Deal with this case by having expire servers return false if unable to expire. Callers will then know where a ServerCrashProcedure has been scheduled or not. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Have expireServer return true if successful. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java The log that included an exception whose message was the current procedure as a String totally baffled me. Make it more obvious what exception is. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java If failed expire of a server, wake our procedure -- do not suspend -- and presume ok to move region to CLOSED state (because going down or concurrent crashed server processing ongoing). Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/6d1740d4 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/6d1740d4 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/6d1740d4 Branch: refs/heads/branch-2 Commit: 6d1740d498d3f0f301a87a0a0cd598827790efa5 Parents: b59c39d Author: Michael Stack Authored: Mon Mar 5 21:20:23 2018 -0800 Committer: Michael Stack Committed: Mon Mar 5 22:53:27 2018 -0800 -- .../hadoop/hbase/master/ServerManager.java | 17 --- .../FailedRemoteDispatchException.java | 9 +++- .../assignment/RegionTransitionProcedure.java | 15 --- .../master/assignment/UnassignProcedure.java| 47 .../assignment/TestAssignmentManager.java | 10 - 5 files changed, 64 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hbase/blob/6d1740d4/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java -- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 06d6c8b..e2f0b6b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -555,15 +555,17 @@ public class ServerManager { } /* - * Expire the passed server. Add it to list of dead servers and queue a - * shutdown processing. + * Expire the passed server. Add it to list of dead servers and queue a shutdown processing. + * @return True if we expired passed serverName else false if we failed to schedule + * an expire (and attendant ServerCrashProcedure -- some clients are dependent on + * server crash procedure being queued and need to know if has not been queued). */ - public synchronized void expireServer(final ServerName serverName) { + public synchronized boolean expireServer(final ServerName serverName) { if (serverName.equals(master.getServerName())) { if (!(master.isAborted() || master.isStopped())) { master.stop("We lost our znode?"); } - return; + return false; } if (!master.isServerCrashProcessingEnabled()) { LOG.info("Master doesn't enable ServerShutdownHandler during initialization, " @@ -573,13 +575,13 @@ public class ServerManager { // the SCP is not enable yet and Meta's RIT may be suspend forever. See HBase-19287 master.getAssignmentManager().handleMetaRITOnCrashedServer(serverName); this.queuedDeadServers.add(serverName); - return; + return false; } if (this.deadservers.isDeadServer(serverName)) { // TODO: Can this happen? It shouldn't be online in this case? LOG.warn("Expiration of " + serverName + " but server shutdown already in progress"); - return; + return false; } moveFromOnlineToDeadServers(serverName); @@ -591,7 +593,7 @@ public class ServerManager { if (this.onlineServers.isEmpty()) { master.stop("Cluster shutdown set; onlineServer=0"); } - return; + return false; } LOG.info("Processing expiration of " + serverName + " on " + this.master.getServerName()); master.getAssignmentManager().submitServerCrash(serverName, true); @@ -602,6 +604,7 @@ public class ServerManager { listener.serverRemoved(serverName); } } +return true; } @VisibleForTesting http://g
hbase git commit: HBASE-20137 TestRSGroups is flakey
Repository: hbase Updated Branches: refs/heads/master 7889df371 -> 1f5e93a8f HBASE-20137 TestRSGroups is flakey On failed RPC we expire the server and suspend expecting the resultant ServerCrashProcedure to wake us back up again. In tests, TestRSGroup hung because it failed to schedule a server expiration because the server was already expired undergoing processing (the test was shutting down). Deal with this case by having expire servers return false if unable to expire. Callers will then know where a ServerCrashProcedure has been scheduled or not. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Have expireServer return true if successful. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java The log that included an exception whose message was the current procedure as a String totally baffled me. Make it more obvious what exception is. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java If failed expire of a server, wake our procedure -- do not suspend -- and presume ok to move region to CLOSED state (because going down or concurrent crashed server processing ongoing). Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/1f5e93a8 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/1f5e93a8 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/1f5e93a8 Branch: refs/heads/master Commit: 1f5e93a8f85c702f65c27c6162cd10c2035e481d Parents: 7889df3 Author: Michael Stack Authored: Mon Mar 5 21:20:23 2018 -0800 Committer: Michael Stack Committed: Tue Mar 6 10:55:40 2018 -0800 -- .../hadoop/hbase/master/ServerManager.java | 17 --- .../FailedRemoteDispatchException.java | 9 +++- .../assignment/RegionTransitionProcedure.java | 15 --- .../master/assignment/UnassignProcedure.java| 47 .../assignment/TestAssignmentManager.java | 10 - 5 files changed, 64 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hbase/blob/1f5e93a8/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java -- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 06d6c8b..e2f0b6b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -555,15 +555,17 @@ public class ServerManager { } /* - * Expire the passed server. Add it to list of dead servers and queue a - * shutdown processing. + * Expire the passed server. Add it to list of dead servers and queue a shutdown processing. + * @return True if we expired passed serverName else false if we failed to schedule + * an expire (and attendant ServerCrashProcedure -- some clients are dependent on + * server crash procedure being queued and need to know if has not been queued). */ - public synchronized void expireServer(final ServerName serverName) { + public synchronized boolean expireServer(final ServerName serverName) { if (serverName.equals(master.getServerName())) { if (!(master.isAborted() || master.isStopped())) { master.stop("We lost our znode?"); } - return; + return false; } if (!master.isServerCrashProcessingEnabled()) { LOG.info("Master doesn't enable ServerShutdownHandler during initialization, " @@ -573,13 +575,13 @@ public class ServerManager { // the SCP is not enable yet and Meta's RIT may be suspend forever. See HBase-19287 master.getAssignmentManager().handleMetaRITOnCrashedServer(serverName); this.queuedDeadServers.add(serverName); - return; + return false; } if (this.deadservers.isDeadServer(serverName)) { // TODO: Can this happen? It shouldn't be online in this case? LOG.warn("Expiration of " + serverName + " but server shutdown already in progress"); - return; + return false; } moveFromOnlineToDeadServers(serverName); @@ -591,7 +593,7 @@ public class ServerManager { if (this.onlineServers.isEmpty()) { master.stop("Cluster shutdown set; onlineServer=0"); } - return; + return false; } LOG.info("Processing expiration of " + serverName + " on " + this.master.getServerName()); master.getAssignmentManager().submitServerCrash(serverName, true); @@ -602,6 +604,7 @@ public class ServerManager { listener.serverRemoved(serverName); } } +return true; } @VisibleForTesting http://git-w