Author: jgray
Date: Wed Dec 22 00:54:01 2010
New Revision: 1051717

URL: http://svn.apache.org/viewvc?rev=1051717&view=rev
Log:
HBASE-3380 Master failover can split logs of live servers

Modified:
    hbase/branches/0.90/CHANGES.txt
    
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
    
hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java

Modified: hbase/branches/0.90/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1051717&r1=1051716&r2=1051717&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Wed Dec 22 00:54:01 2010
@@ -765,6 +765,7 @@ Release 0.90.0 - Unreleased
    HBASE-3374  Our jruby jar has *GPL jars in it; fix
    HBASE-3343  Server not shutting down after losing log lease
    HBASE-3381  Interrupt of a region open comes across as a successful open
+   HBASE-3380  Master failover can split logs of live servers
 
 
   IMPROVEMENTS

Modified: 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1051717&r1=1051716&r2=1051717&view=diff
==============================================================================
--- 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
 (original)
+++ 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
 Wed Dec 22 00:54:01 2010
@@ -617,14 +617,31 @@ public class ServerManager {
   public int waitForRegionServers()
   throws InterruptedException {
     long interval = this.master.getConfiguration().
-      getLong("hbase.master.wait.on.regionservers.interval", 3000);
+      getLong("hbase.master.wait.on.regionservers.interval", 1500);
+    long timeout = this.master.getConfiguration().
+      getLong("hbase.master.wait.on.regionservers.timeout", 4500);
+    int minToStart = this.master.getConfiguration().
+      getInt("hbase.master.wait.on.regionservers.mintostart", 1);
+    int maxToStart = this.master.getConfiguration().
+      getInt("hbase.master.wait.on.regionservers.maxtostart", 
Integer.MAX_VALUE);
     // So, number of regionservers > 0 and its been n since last check in, 
break,
     // else just stall here
     int count = 0;
+    long slept = 0;
     for (int oldcount = countOfRegionServers(); !this.master.isStopped();) {
       Thread.sleep(interval);
+      slept += interval;
       count = countOfRegionServers();
-      if (count == oldcount && count > 0) break;
+      if (count == oldcount && count >= minToStart && slept >= timeout) {
+        LOG.info("Finished waiting for regionserver count to settle; " +
+            "count=" + count + ", sleptFor=" + slept);
+        break;
+      }
+      if (count >= maxToStart) {
+        LOG.info("At least the max configured number of regionserver(s) have " 
+
+            "checked in: " + count);
+        break;
+      }
       if (count == 0) {
         LOG.info("Waiting on regionserver(s) to checkin");
       } else {

Modified: 
hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java?rev=1051717&r1=1051716&r2=1051717&view=diff
==============================================================================
--- 
hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
 (original)
+++ 
hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
 Wed Dec 22 00:54:01 2010
@@ -69,8 +69,13 @@ public class TestMasterFailover {
     final int NUM_MASTERS = 3;
     final int NUM_RS = 3;
 
+    // Create config to use for this cluster
+    Configuration conf = HBaseConfiguration.create();
+    conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3);
+    conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3);
+
     // Start the cluster
-    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
 
@@ -221,6 +226,8 @@ public class TestMasterFailover {
     // Need to drop the timeout much lower
     conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
     conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
+    conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3);
+    conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3);
 
     // Start the cluster
     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
@@ -525,6 +532,8 @@ public class TestMasterFailover {
     // Need to drop the timeout much lower
     conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
     conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
+    conf.setInt("hbase.master.wait.on.regionservers.mintostart", 1);
+    conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 2);
 
     // Create and start the cluster
     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);


Reply via email to