Author: stack
Date: Tue Dec 20 06:45:50 2011
New Revision: 1221120
URL: http://svn.apache.org/viewvc?rev=1221120&view=rev
Log:
HBASE-5063 RegionServers fail to report to backup HMaster after primary goes
down
Modified:
hbase/branches/0.92/CHANGES.txt
hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
Modified: hbase/branches/0.92/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hbase/branches/0.92/CHANGES.txt?rev=1221120&r1=1221119&r2=1221120&view=diff
==============================================================================
--- hbase/branches/0.92/CHANGES.txt (original)
+++ hbase/branches/0.92/CHANGES.txt Tue Dec 20 06:45:50 2011
@@ -493,6 +493,7 @@ Release 0.92.0 - Unreleased
HBASE-5040 Secure HBase builds fail
HBASE-5062 Missing logons if security is enabled
HBASE-4935 hbase 0.92.0 doesn't work going against 0.20.205.0, its
packaged hadoop
+ HBASE-5063 RegionServers fail to report to backup HMaster after primary
goes down
TESTS
HBASE-4492 TestRollingRestart fails intermittently
Modified:
hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1221120&r1=1221119&r2=1221120&view=diff
==============================================================================
---
hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
(original)
+++
hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Tue Dec 20 06:45:50 2011
@@ -1603,15 +1603,23 @@ public class HRegionServer implements HR
*/
private ServerName getMaster() {
ServerName masterServerName = null;
- while ((masterServerName = this.masterAddressManager.getMasterAddress())
== null) {
- if (!keepLooping()) return null;
- LOG.debug("No master found; retry");
- sleeper.sleep();
- }
- InetSocketAddress isa =
- new InetSocketAddress(masterServerName.getHostname(),
masterServerName.getPort());
HMasterRegionInterface master = null;
while (keepLooping() && master == null) {
+ masterServerName = this.masterAddressManager.getMasterAddress();
+ if (masterServerName == null) {
+ if (!keepLooping()) {
+ // give up with no connection.
+ LOG.debug("No master found and cluster is stopped; bailing out");
+ return null;
+ }
+ LOG.debug("No master found; retry");
+ sleeper.sleep();
+ continue;
+ }
+
+ InetSocketAddress isa =
+ new InetSocketAddress(masterServerName.getHostname(),
masterServerName.getPort());
+
LOG.info("Attempting connect to Master server at " +
this.masterAddressManager.getMasterAddress());
try {
Modified:
hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
URL:
http://svn.apache.org/viewvc/hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java?rev=1221120&r1=1221119&r2=1221120&view=diff
==============================================================================
---
hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
(original)
+++
hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
Tue Dec 20 06:45:50 2011
@@ -123,6 +123,9 @@ public class TestMasterFailover {
}
assertEquals(1, numActive);
assertEquals(2, masterThreads.size());
+ int rsCount =
masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
+ LOG.info("Active master managing " + rsCount + " regions servers");
+ assertEquals(3, rsCount);
// kill the active master
LOG.debug("\n\nStopping the active master\n");
@@ -135,8 +138,13 @@ public class TestMasterFailover {
LOG.debug("\n\nVerifying backup master is now active\n");
// should only have one master now
assertEquals(1, masterThreads.size());
+
// and he should be active
- assertTrue(masterThreads.get(0).getMaster().isActiveMaster());
+ HMaster active = masterThreads.get(0).getMaster();
+ int rss = active.getClusterStatus().getServersSize();
+ LOG.info("Active master managing " + rss + " regions servers");
+ assertTrue(active.isActiveMaster());
+ assertEquals(3, rss);
// Stop the cluster
TEST_UTIL.shutdownMiniCluster();