Author: stack
Date: Tue Dec 20 06:46:17 2011
New Revision: 1221121
URL: http://svn.apache.org/viewvc?rev=1221121&view=rev
Log:
HBASE-5063 RegionServers fail to report to backup HMaster after primary goes
down
Modified:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
Modified:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1221121&r1=1221120&r2=1221121&view=diff
==============================================================================
---
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
(original)
+++
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Tue Dec 20 06:46:17 2011
@@ -1738,21 +1738,25 @@ public class HRegionServer implements HR
private ServerName getMaster() {
ServerName masterServerName = null;
long previousLogTime = 0;
- while ((masterServerName = this.masterAddressManager.getMasterAddress())
== null) {
- if (!keepLooping()) return null;
- if (System.currentTimeMillis() > (previousLogTime+1000)){
+ HMasterRegionInterface master = null;
+ while (keepLooping() && master == null) {
+ masterServerName = this.masterAddressManager.getMasterAddress();
+ if (masterServerName == null) {
+ if (!keepLooping()) {
+ // give up with no connection.
+ LOG.debug("No master found and cluster is stopped; bailing out");
+ return null;
+ }
LOG.debug("No master found; retry");
previousLogTime = System.currentTimeMillis();
+
+ sleeper.sleep();
+ continue;
}
- try {
- Thread.sleep(100);
- } catch (InterruptedException ignored) {
- }
- }
- InetSocketAddress isa =
- new InetSocketAddress(masterServerName.getHostname(),
masterServerName.getPort());
- HMasterRegionInterface master = null;
- while (keepLooping() && master == null) {
+
+ InetSocketAddress isa =
+ new InetSocketAddress(masterServerName.getHostname(),
masterServerName.getPort());
+
LOG.info("Attempting connect to Master server at " +
this.masterAddressManager.getMasterAddress());
try {
Modified:
hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java?rev=1221121&r1=1221120&r2=1221121&view=diff
==============================================================================
---
hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
(original)
+++
hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
Tue Dec 20 06:46:17 2011
@@ -115,6 +115,9 @@ public class TestMasterFailover {
}
assertEquals(1, numActive);
assertEquals(2, masterThreads.size());
+ int rsCount =
masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
+ LOG.info("Active master managing " + rsCount + " regions servers");
+ assertEquals(3, rsCount);
// kill the active master
LOG.debug("\n\nStopping the active master\n");
@@ -127,8 +130,13 @@ public class TestMasterFailover {
LOG.debug("\n\nVerifying backup master is now active\n");
// should only have one master now
assertEquals(1, masterThreads.size());
+
// and he should be active
- assertTrue(masterThreads.get(0).getMaster().isActiveMaster());
+ HMaster active = masterThreads.get(0).getMaster();
+ int rss = active.getClusterStatus().getServersSize();
+ LOG.info("Active master managing " + rss + " regions servers");
+ assertTrue(active.isActiveMaster());
+ assertEquals(3, rss);
// Stop the cluster
TEST_UTIL.shutdownMiniCluster();