Repository: hbase
Updated Branches:
refs/heads/branch-1.0 bedc17db3 -> 740df826b
HBASE-12844 ServerManager.isServerReacable() should sleep between retries
Conflicts:
hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/740df826
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/740df826
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/740df826
Branch: refs/heads/branch-1.0
Commit: 740df826bda3e1f2cd89f39dbf61804b9b56e7d2
Parents: bedc17d
Author: Enis Soztutar <[email protected]>
Authored: Wed Jan 14 15:45:46 2015 -0800
Committer: Enis Soztutar <[email protected]>
Committed: Wed Jan 14 15:53:44 2015 -0800
----------------------------------------------------------------------
.../hadoop/hbase/master/ServerManager.java | 25 ++++++++++++++++----
.../master/TestAssignmentManagerOnCluster.java | 6 +++--
2 files changed, 24 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/740df826/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
----------------------------------------------------------------------
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index 8be32fe..04c4202 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -63,6 +63,9 @@ import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Triple;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.RetryCounter;
+import org.apache.hadoop.hbase.util.RetryCounterFactory;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
@@ -141,6 +144,8 @@ public class ServerManager {
private final long maxSkew;
private final long warningSkew;
+ private final RetryCounterFactory pingRetryCounterFactory;
+
/**
* Set of region servers which are dead but not processed immediately. If one
* server died before master enables ServerShutdownHandler, the server will
be
@@ -199,6 +204,11 @@ public class ServerManager {
maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
warningSkew = c.getLong("hbase.master.warningclockskew", 10000);
this.connection = connect ?
(ClusterConnection)ConnectionFactory.createConnection(c) : null;
+ int pingMaxAttempts = Math.max(1, master.getConfiguration().getInt(
+ "hbase.master.maximum.ping.server.attempts", 10));
+ int pingSleepInterval = Math.max(1, master.getConfiguration().getInt(
+ "hbase.master.ping.server.retry.sleep.interval", 100));
+ this.pingRetryCounterFactory = new RetryCounterFactory(pingMaxAttempts,
pingSleepInterval);
}
/**
@@ -801,9 +811,9 @@ public class ServerManager {
*/
public boolean isServerReachable(ServerName server) {
if (server == null) throw new NullPointerException("Passed server is
null");
- int maximumAttempts = Math.max(1, master.getConfiguration().getInt(
- "hbase.master.maximum.ping.server.attempts", 10));
- for (int i = 0; i < maximumAttempts; i++) {
+
+ RetryCounter retryCounter = pingRetryCounterFactory.create();
+ while (retryCounter.shouldRetry()) {
try {
AdminService.BlockingInterface admin = getRsAdmin(server);
if (admin != null) {
@@ -812,8 +822,13 @@ public class ServerManager {
&& server.getStartcode() == info.getServerName().getStartCode();
}
} catch (IOException ioe) {
- LOG.debug("Couldn't reach " + server + ", try=" + i
- + " of " + maximumAttempts, ioe);
+ LOG.debug("Couldn't reach " + server + ", try=" +
retryCounter.getAttemptTimes()
+ + " of " + retryCounter.getMaxAttempts(), ioe);
+ try {
+ retryCounter.sleepUntilNextRetry();
+ } catch(InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ }
}
}
return false;
http://git-wip-us.apache.org/repos/asf/hbase/blob/740df826/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
----------------------------------------------------------------------
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
index bf44147..a4ae2fe 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
@@ -105,6 +105,8 @@ public class TestAssignmentManagerOnCluster {
conf.setInt("hbase.assignment.maximum.attempts", 3);
// Put meta on master to avoid meta server shutdown handling
conf.set("hbase.balancer.tablesOnMaster", "hbase:meta");
+ conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
+ conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1);
TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class,
MyRegionServer.class);
admin = TEST_UTIL.getHBaseAdmin();
@@ -1219,7 +1221,7 @@ public class TestAssignmentManagerOnCluster {
TEST_UTIL.deleteTable(Bytes.toBytes(table));
}
}
-
+
/**
* Test concurrent updates to meta when meta is not on master
* @throws Exception
@@ -1279,7 +1281,7 @@ public class TestAssignmentManagerOnCluster {
assertTrue(count == 100);
rss.stop();
}
-
+
static class MyLoadBalancer extends StochasticLoadBalancer {
// For this region, if specified, always assign to nowhere
static volatile String controledRegion = null;