[09/50] [abbrv] hbase git commit: HBASE-20137 TestRSGroups is flakey

2018-03-07 Thread zhangduo
HBASE-20137 TestRSGroups is flakey

On failed RPC we expire the server and suspend expecting the
resultant ServerCrashProcedure to wake us back up again. In tests,
TestRSGroup hung because it failed to schedule a server expiration
because the server was already expired undergoing processing (the
test was shutting down). Deal with this case by having expire
servers return false if unable to expire. Callers will then know
where a ServerCrashProcedure has been scheduled or not.

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
  Have expireServer return true if successful.

M 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
 The log that included an exception whose message was the current
procedure as a String totally baffled me. Make it more obvious what
exception is.

M 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
 If failed expire of a server, wake our procedure -- do not suspend --
and presume ok to move region to CLOSED state (because going down or
concurrent crashed server processing ongoing).


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/6d1740d4
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/6d1740d4
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/6d1740d4

Branch: refs/heads/HBASE-19397-branch-2
Commit: 6d1740d498d3f0f301a87a0a0cd598827790efa5
Parents: b59c39d
Author: Michael Stack 
Authored: Mon Mar 5 21:20:23 2018 -0800
Committer: Michael Stack 
Committed: Mon Mar 5 22:53:27 2018 -0800

--
 .../hadoop/hbase/master/ServerManager.java  | 17 ---
 .../FailedRemoteDispatchException.java  |  9 +++-
 .../assignment/RegionTransitionProcedure.java   | 15 ---
 .../master/assignment/UnassignProcedure.java| 47 
 .../assignment/TestAssignmentManager.java   | 10 -
 5 files changed, 64 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hbase/blob/6d1740d4/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
--
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index 06d6c8b..e2f0b6b 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -555,15 +555,17 @@ public class ServerManager {
   }
 
   /*
-   * Expire the passed server.  Add it to list of dead servers and queue a
-   * shutdown processing.
+   * Expire the passed server. Add it to list of dead servers and queue a 
shutdown processing.
+   * @return True if we expired passed serverName else false if 
we failed to schedule
+   * an expire (and attendant ServerCrashProcedure -- some clients are 
dependent on
+   * server crash procedure being queued and need to know if has not been 
queued).
*/
-  public synchronized void expireServer(final ServerName serverName) {
+  public synchronized boolean expireServer(final ServerName serverName) {
 if (serverName.equals(master.getServerName())) {
   if (!(master.isAborted() || master.isStopped())) {
 master.stop("We lost our znode?");
   }
-  return;
+  return false;
 }
 if (!master.isServerCrashProcessingEnabled()) {
   LOG.info("Master doesn't enable ServerShutdownHandler during 
initialization, "
@@ -573,13 +575,13 @@ public class ServerManager {
   // the SCP is not enable yet and Meta's RIT may be suspend forever. See 
HBase-19287
   master.getAssignmentManager().handleMetaRITOnCrashedServer(serverName);
   this.queuedDeadServers.add(serverName);
-  return;
+  return false;
 }
 if (this.deadservers.isDeadServer(serverName)) {
   // TODO: Can this happen?  It shouldn't be online in this case?
   LOG.warn("Expiration of " + serverName +
   " but server shutdown already in progress");
-  return;
+  return false;
 }
 moveFromOnlineToDeadServers(serverName);
 
@@ -591,7 +593,7 @@ public class ServerManager {
   if (this.onlineServers.isEmpty()) {
 master.stop("Cluster shutdown set; onlineServer=0");
   }
-  return;
+  return false;
 }
 LOG.info("Processing expiration of " + serverName + " on " + 
this.master.getServerName());
 master.getAssignmentManager().submitServerCrash(serverName, true);
@@ -602,6 +604,7 @@ public class ServerManager {
 listener.serverRemoved(serverName);
   }
 }
+return true;
   }
 
   @VisibleForTesting

http://git-wip-us.apache.org/repos/asf/hbase/blob/6d1740d4/hbase-server/src/mai

[18/33] hbase git commit: HBASE-20137 TestRSGroups is flakey

2018-03-07 Thread zhangduo
HBASE-20137 TestRSGroups is flakey

On failed RPC we expire the server and suspend expecting the
resultant ServerCrashProcedure to wake us back up again. In tests,
TestRSGroup hung because it failed to schedule a server expiration
because the server was already expired undergoing processing (the
test was shutting down). Deal with this case by having expire
servers return false if unable to expire. Callers will then know
where a ServerCrashProcedure has been scheduled or not.

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
  Have expireServer return true if successful.

M 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
 The log that included an exception whose message was the current
procedure as a String totally baffled me. Make it more obvious what
exception is.

M 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
 If failed expire of a server, wake our procedure -- do not suspend --
and presume ok to move region to CLOSED state (because going down or
concurrent crashed server processing ongoing).


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/1f5e93a8
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/1f5e93a8
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/1f5e93a8

Branch: refs/heads/HBASE-19064
Commit: 1f5e93a8f85c702f65c27c6162cd10c2035e481d
Parents: 7889df3
Author: Michael Stack 
Authored: Mon Mar 5 21:20:23 2018 -0800
Committer: Michael Stack 
Committed: Tue Mar 6 10:55:40 2018 -0800

--
 .../hadoop/hbase/master/ServerManager.java  | 17 ---
 .../FailedRemoteDispatchException.java  |  9 +++-
 .../assignment/RegionTransitionProcedure.java   | 15 ---
 .../master/assignment/UnassignProcedure.java| 47 
 .../assignment/TestAssignmentManager.java   | 10 -
 5 files changed, 64 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hbase/blob/1f5e93a8/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
--
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index 06d6c8b..e2f0b6b 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -555,15 +555,17 @@ public class ServerManager {
   }
 
   /*
-   * Expire the passed server.  Add it to list of dead servers and queue a
-   * shutdown processing.
+   * Expire the passed server. Add it to list of dead servers and queue a 
shutdown processing.
+   * @return True if we expired passed serverName else false if 
we failed to schedule
+   * an expire (and attendant ServerCrashProcedure -- some clients are 
dependent on
+   * server crash procedure being queued and need to know if has not been 
queued).
*/
-  public synchronized void expireServer(final ServerName serverName) {
+  public synchronized boolean expireServer(final ServerName serverName) {
 if (serverName.equals(master.getServerName())) {
   if (!(master.isAborted() || master.isStopped())) {
 master.stop("We lost our znode?");
   }
-  return;
+  return false;
 }
 if (!master.isServerCrashProcessingEnabled()) {
   LOG.info("Master doesn't enable ServerShutdownHandler during 
initialization, "
@@ -573,13 +575,13 @@ public class ServerManager {
   // the SCP is not enable yet and Meta's RIT may be suspend forever. See 
HBase-19287
   master.getAssignmentManager().handleMetaRITOnCrashedServer(serverName);
   this.queuedDeadServers.add(serverName);
-  return;
+  return false;
 }
 if (this.deadservers.isDeadServer(serverName)) {
   // TODO: Can this happen?  It shouldn't be online in this case?
   LOG.warn("Expiration of " + serverName +
   " but server shutdown already in progress");
-  return;
+  return false;
 }
 moveFromOnlineToDeadServers(serverName);
 
@@ -591,7 +593,7 @@ public class ServerManager {
   if (this.onlineServers.isEmpty()) {
 master.stop("Cluster shutdown set; onlineServer=0");
   }
-  return;
+  return false;
 }
 LOG.info("Processing expiration of " + serverName + " on " + 
this.master.getServerName());
 master.getAssignmentManager().submitServerCrash(serverName, true);
@@ -602,6 +604,7 @@ public class ServerManager {
 listener.serverRemoved(serverName);
   }
 }
+return true;
   }
 
   @VisibleForTesting

http://git-wip-us.apache.org/repos/asf/hbase/blob/1f5e93a8/hbase-server/src/main/java/or

hbase git commit: HBASE-20137 TestRSGroups is flakey

2018-03-06 Thread stack
Repository: hbase
Updated Branches:
  refs/heads/branch-2 b59c39d94 -> 6d1740d49


HBASE-20137 TestRSGroups is flakey

On failed RPC we expire the server and suspend expecting the
resultant ServerCrashProcedure to wake us back up again. In tests,
TestRSGroup hung because it failed to schedule a server expiration
because the server was already expired undergoing processing (the
test was shutting down). Deal with this case by having expire
servers return false if unable to expire. Callers will then know
where a ServerCrashProcedure has been scheduled or not.

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
  Have expireServer return true if successful.

M 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
 The log that included an exception whose message was the current
procedure as a String totally baffled me. Make it more obvious what
exception is.

M 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
 If failed expire of a server, wake our procedure -- do not suspend --
and presume ok to move region to CLOSED state (because going down or
concurrent crashed server processing ongoing).


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/6d1740d4
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/6d1740d4
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/6d1740d4

Branch: refs/heads/branch-2
Commit: 6d1740d498d3f0f301a87a0a0cd598827790efa5
Parents: b59c39d
Author: Michael Stack 
Authored: Mon Mar 5 21:20:23 2018 -0800
Committer: Michael Stack 
Committed: Mon Mar 5 22:53:27 2018 -0800

--
 .../hadoop/hbase/master/ServerManager.java  | 17 ---
 .../FailedRemoteDispatchException.java  |  9 +++-
 .../assignment/RegionTransitionProcedure.java   | 15 ---
 .../master/assignment/UnassignProcedure.java| 47 
 .../assignment/TestAssignmentManager.java   | 10 -
 5 files changed, 64 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hbase/blob/6d1740d4/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
--
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index 06d6c8b..e2f0b6b 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -555,15 +555,17 @@ public class ServerManager {
   }
 
   /*
-   * Expire the passed server.  Add it to list of dead servers and queue a
-   * shutdown processing.
+   * Expire the passed server. Add it to list of dead servers and queue a 
shutdown processing.
+   * @return True if we expired passed serverName else false if 
we failed to schedule
+   * an expire (and attendant ServerCrashProcedure -- some clients are 
dependent on
+   * server crash procedure being queued and need to know if has not been 
queued).
*/
-  public synchronized void expireServer(final ServerName serverName) {
+  public synchronized boolean expireServer(final ServerName serverName) {
 if (serverName.equals(master.getServerName())) {
   if (!(master.isAborted() || master.isStopped())) {
 master.stop("We lost our znode?");
   }
-  return;
+  return false;
 }
 if (!master.isServerCrashProcessingEnabled()) {
   LOG.info("Master doesn't enable ServerShutdownHandler during 
initialization, "
@@ -573,13 +575,13 @@ public class ServerManager {
   // the SCP is not enable yet and Meta's RIT may be suspend forever. See 
HBase-19287
   master.getAssignmentManager().handleMetaRITOnCrashedServer(serverName);
   this.queuedDeadServers.add(serverName);
-  return;
+  return false;
 }
 if (this.deadservers.isDeadServer(serverName)) {
   // TODO: Can this happen?  It shouldn't be online in this case?
   LOG.warn("Expiration of " + serverName +
   " but server shutdown already in progress");
-  return;
+  return false;
 }
 moveFromOnlineToDeadServers(serverName);
 
@@ -591,7 +593,7 @@ public class ServerManager {
   if (this.onlineServers.isEmpty()) {
 master.stop("Cluster shutdown set; onlineServer=0");
   }
-  return;
+  return false;
 }
 LOG.info("Processing expiration of " + serverName + " on " + 
this.master.getServerName());
 master.getAssignmentManager().submitServerCrash(serverName, true);
@@ -602,6 +604,7 @@ public class ServerManager {
 listener.serverRemoved(serverName);
   }
 }
+return true;
   }
 
   @VisibleForTesting

http://g

hbase git commit: HBASE-20137 TestRSGroups is flakey

2018-03-06 Thread stack
Repository: hbase
Updated Branches:
  refs/heads/master 7889df371 -> 1f5e93a8f


HBASE-20137 TestRSGroups is flakey

On failed RPC we expire the server and suspend expecting the
resultant ServerCrashProcedure to wake us back up again. In tests,
TestRSGroup hung because it failed to schedule a server expiration
because the server was already expired undergoing processing (the
test was shutting down). Deal with this case by having expire
servers return false if unable to expire. Callers will then know
where a ServerCrashProcedure has been scheduled or not.

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
  Have expireServer return true if successful.

M 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
 The log that included an exception whose message was the current
procedure as a String totally baffled me. Make it more obvious what
exception is.

M 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
 If failed expire of a server, wake our procedure -- do not suspend --
and presume ok to move region to CLOSED state (because going down or
concurrent crashed server processing ongoing).


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/1f5e93a8
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/1f5e93a8
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/1f5e93a8

Branch: refs/heads/master
Commit: 1f5e93a8f85c702f65c27c6162cd10c2035e481d
Parents: 7889df3
Author: Michael Stack 
Authored: Mon Mar 5 21:20:23 2018 -0800
Committer: Michael Stack 
Committed: Tue Mar 6 10:55:40 2018 -0800

--
 .../hadoop/hbase/master/ServerManager.java  | 17 ---
 .../FailedRemoteDispatchException.java  |  9 +++-
 .../assignment/RegionTransitionProcedure.java   | 15 ---
 .../master/assignment/UnassignProcedure.java| 47 
 .../assignment/TestAssignmentManager.java   | 10 -
 5 files changed, 64 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hbase/blob/1f5e93a8/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
--
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index 06d6c8b..e2f0b6b 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -555,15 +555,17 @@ public class ServerManager {
   }
 
   /*
-   * Expire the passed server.  Add it to list of dead servers and queue a
-   * shutdown processing.
+   * Expire the passed server. Add it to list of dead servers and queue a 
shutdown processing.
+   * @return True if we expired passed serverName else false if 
we failed to schedule
+   * an expire (and attendant ServerCrashProcedure -- some clients are 
dependent on
+   * server crash procedure being queued and need to know if has not been 
queued).
*/
-  public synchronized void expireServer(final ServerName serverName) {
+  public synchronized boolean expireServer(final ServerName serverName) {
 if (serverName.equals(master.getServerName())) {
   if (!(master.isAborted() || master.isStopped())) {
 master.stop("We lost our znode?");
   }
-  return;
+  return false;
 }
 if (!master.isServerCrashProcessingEnabled()) {
   LOG.info("Master doesn't enable ServerShutdownHandler during 
initialization, "
@@ -573,13 +575,13 @@ public class ServerManager {
   // the SCP is not enable yet and Meta's RIT may be suspend forever. See 
HBase-19287
   master.getAssignmentManager().handleMetaRITOnCrashedServer(serverName);
   this.queuedDeadServers.add(serverName);
-  return;
+  return false;
 }
 if (this.deadservers.isDeadServer(serverName)) {
   // TODO: Can this happen?  It shouldn't be online in this case?
   LOG.warn("Expiration of " + serverName +
   " but server shutdown already in progress");
-  return;
+  return false;
 }
 moveFromOnlineToDeadServers(serverName);
 
@@ -591,7 +593,7 @@ public class ServerManager {
   if (this.onlineServers.isEmpty()) {
 master.stop("Cluster shutdown set; onlineServer=0");
   }
-  return;
+  return false;
 }
 LOG.info("Processing expiration of " + serverName + " on " + 
this.master.getServerName());
 master.getAssignmentManager().submitServerCrash(serverName, true);
@@ -602,6 +604,7 @@ public class ServerManager {
 listener.serverRemoved(serverName);
   }
 }
+return true;
   }
 
   @VisibleForTesting

http://git-w