This is an automated email from the ASF dual-hosted git repository.

apurtell pushed a commit to branch branch-1
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-1 by this push:
     new 47e42da  Revert "HBASE-21164 reportForDuty should do backoff rather 
than retry"
47e42da is described below

commit 47e42dab75d75208584e8cabbd90568467833598
Author: Andrew Purtell <[email protected]>
AuthorDate: Wed Feb 20 09:25:46 2019 -0800

    Revert "HBASE-21164 reportForDuty should do backoff rather than retry"
    
    This reverts commit f3f3798575527df45961f4cbdb6d4c1d04cfb1e3.
    
    This change causes TestMasterShutdown to reproducibly fail in some 
environments.
---
 .../org/apache/hadoop/hbase/util/RetryCounter.java | 10 ---
 .../java/org/apache/hadoop/hbase/util/Sleeper.java | 31 +++++---
 .../org/apache/hadoop/hbase/master/HMaster.java    | 13 +---
 .../hadoop/hbase/regionserver/HRegionServer.java   | 15 +---
 .../TestRegionServerReportForDuty.java             | 89 ----------------------
 5 files changed, 26 insertions(+), 132 deletions(-)

diff --git 
a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RetryCounter.java 
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RetryCounter.java
index 881dd36..73512fa 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RetryCounter.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RetryCounter.java
@@ -174,14 +174,4 @@ public class RetryCounter {
   public int getAttemptTimes() {
     return attempts;
   }
-
-  public long getBackoffTime() {
-    return this.retryConfig.backoffPolicy.getBackoffTime(this.retryConfig, 
getAttemptTimes());
-  }
-
-  public long getBackoffTimeAndIncrementAttempts() {
-    long backoffTime = getBackoffTime();
-    useRetry();
-    return backoffTime;
-  }
 }
diff --git 
a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Sleeper.java 
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Sleeper.java
index a07ee9c..a60c571 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Sleeper.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Sleeper.java
@@ -50,6 +50,13 @@ public class Sleeper {
   }
 
   /**
+   * Sleep for period.
+   */
+  public void sleep() {
+    sleep(System.currentTimeMillis());
+  }
+
+  /**
    * If currently asleep, stops sleeping; if not asleep, will skip the next
    * sleep cycle.
    */
@@ -61,24 +68,28 @@ public class Sleeper {
   }
 
   /**
-   * Sleep for period.
+   * Sleep for period adjusted by passed <code>startTime</code>
+   * @param startTime Time some task started previous to now.  Time to sleep
+   * will be docked current time minus passed <code>startTime</code>.
    */
-  public void sleep() {
-    sleep(this.period);
-  }
-
-  public void sleep(long sleepTime) {
+  public void sleep(final long startTime) {
     if (this.stopper.isStopped()) {
       return;
     }
     long now = System.currentTimeMillis();
-    long currentSleepTime = sleepTime;
-    while (currentSleepTime > 0) {
+    long waitTime = this.period - (now - startTime);
+    if (waitTime > this.period) {
+      LOG.warn("Calculated wait time > " + this.period +
+        "; setting to this.period: " + System.currentTimeMillis() + ", " +
+        startTime);
+      waitTime = this.period;
+    }
+    while (waitTime > 0) {
       long woke = -1;
       try {
         synchronized (sleepLock) {
           if (triggerWake) break;
-          sleepLock.wait(currentSleepTime);
+          sleepLock.wait(waitTime);
         }
         woke = System.currentTimeMillis();
         long slept = woke - now;
@@ -97,7 +108,7 @@ public class Sleeper {
       }
       // Recalculate waitTime.
       woke = (woke == -1)? System.currentTimeMillis(): woke;
-      currentSleepTime = this.period - (woke - now);
+      waitTime = this.period - (woke - startTime);
     }
     synchronized(sleepLock) {
       triggerWake = false;
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index c691402..d8a56bd 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -2649,18 +2649,7 @@ public class HMaster extends HRegionServer implements 
MasterServices, Server {
     stop("Stopped by " + Thread.currentThread().getName());
   }
 
-  @Override
-  public void stop(String msg) {
-    if (!isStopped()) {
-      super.stop(msg);
-      if (this.activeMasterManager != null) {
-        this.activeMasterManager.stop();
-      }
-    }
-  }
-
-  @VisibleForTesting
-  protected void checkServiceStarted() throws ServerNotRunningYetException {
+  void checkServiceStarted() throws ServerNotRunningYetException {
     if (!serviceStarted) {
       throw new ServerNotRunningYetException("Server is not running yet");
     }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index 2751e72..bc2075d 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -182,8 +182,6 @@ import org.apache.hadoop.hbase.util.HasThread;
 import org.apache.hadoop.hbase.util.JSONBean;
 import org.apache.hadoop.hbase.util.JvmPauseMonitor;
 import org.apache.hadoop.hbase.util.MBeanUtil;
-import org.apache.hadoop.hbase.util.RetryCounter;
-import org.apache.hadoop.hbase.util.RetryCounterFactory;
 import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
 import org.apache.hadoop.hbase.util.Sleeper;
 import org.apache.hadoop.hbase.util.Threads;
@@ -974,18 +972,13 @@ public class HRegionServer extends HasThread implements
         this.rsHost = new RegionServerCoprocessorHost(this, this.conf);
       }
 
-      // Try and register with the Master; tell it we are here.  Break if 
server is stopped or the
-      // clusterup flag is down or hdfs went wacky. Once registered 
successfully, go ahead and start
-      // up all Services. Use RetryCounter to get backoff in case Master is 
struggling to come up.
-      RetryCounterFactory rcf = new RetryCounterFactory(Integer.MAX_VALUE,
-          this.sleeper.getPeriod(), 1000 * 60 * 5);
-      RetryCounter rc = rcf.create();
+      // Try and register with the Master; tell it we are here.  Break if
+      // server is stopped or the clusterup flag is down or hdfs went wacky.
       while (keepLooping()) {
         RegionServerStartupResponse w = reportForDuty();
         if (w == null) {
-          long sleepTime = rc.getBackoffTimeAndIncrementAttempts();
-          LOG.warn("reportForDuty failed; sleeping " + sleepTime + " ms and 
then retrying");
-          this.sleeper.sleep(sleepTime);
+          LOG.warn("reportForDuty failed; sleeping and then retrying.");
+          this.sleeper.sleep();
         } else {
           handleReportForDutyResponse(w);
           break;
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
index 792843c..2f60bef 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
@@ -21,9 +21,7 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
-import java.io.StringWriter;
 
-import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -32,19 +30,12 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.LocalHBaseCluster;
 import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
 import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
-import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.master.ServerManager;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
-import org.apache.log4j.Appender;
-import org.apache.log4j.Layout;
-import org.apache.log4j.PatternLayout;
-import org.apache.log4j.WriterAppender;
 import org.apache.zookeeper.KeeperException;
-
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -82,86 +73,6 @@ public class TestRegionServerReportForDuty {
   }
 
   /**
-   * LogCapturer is similar to {@link 
org.apache.hadoop.test.GenericTestUtils.LogCapturer}
-   * except that this implementation has a default appender to the root logger.
-   * Hadoop 2.8+ supports the default appender in the LogCapture it ships and 
this can be replaced.
-   * TODO: This class can be removed after we upgrade Hadoop dependency.
-   */
-  static class LogCapturer {
-    private StringWriter sw = new StringWriter();
-    private WriterAppender appender;
-    private org.apache.log4j.Logger logger;
-
-    LogCapturer(org.apache.log4j.Logger logger) {
-      this.logger = logger;
-      Appender defaultAppender = 
org.apache.log4j.Logger.getRootLogger().getAppender("stdout");
-      if (defaultAppender == null) {
-        defaultAppender = 
org.apache.log4j.Logger.getRootLogger().getAppender("console");
-      }
-      final Layout layout = (defaultAppender == null) ? new PatternLayout() :
-          defaultAppender.getLayout();
-      this.appender = new WriterAppender(layout, sw);
-      this.logger.addAppender(this.appender);
-    }
-
-    String getOutput() {
-      return sw.toString();
-    }
-
-    public void stopCapturing() {
-      this.logger.removeAppender(this.appender);
-    }
-  }
-
-  /**
-   * This test HMaster class will always throw ServerNotRunningYetException if 
checked.
-   */
-  public static class NeverInitializedMaster extends HMaster {
-    public NeverInitializedMaster(Configuration conf, CoordinatedStateManager 
csm)
-        throws IOException, KeeperException, InterruptedException {
-      super(conf, csm);
-    }
-
-    @Override
-    protected void checkServiceStarted() throws ServerNotRunningYetException {
-      throw new ServerNotRunningYetException("Server is not running yet");
-    }
-  }
-
-  /**
-   * Tests region server should backoff to report for duty if master is not 
ready.
-   */
-  @Test
-  public void testReportForDutyBackoff() throws IOException, 
InterruptedException {
-    cluster.getConfiguration().set(HConstants.MASTER_IMPL, 
NeverInitializedMaster.class.getName());
-    master = cluster.addMaster();
-    master.start();
-
-    LogCapturer capturer = new 
LogCapturer(org.apache.log4j.Logger.getLogger(HRegionServer.class));
-    // Set sleep interval relatively low so that exponential backoff is more 
demanding.
-    int msginterval = 100;
-    cluster.getConfiguration().setInt("hbase.regionserver.msginterval", 
msginterval);
-    rs = cluster.addRegionServer();
-    rs.start();
-
-    int interval = 10_000;
-    Thread.sleep(interval);
-    capturer.stopCapturing();
-    String output = capturer.getOutput();
-    LOG.info(output);
-    String failMsg = "reportForDuty failed;";
-    int count = StringUtils.countMatches(output, failMsg);
-
-    // Following asserts the actual retry number is in range (expectedRetry/2, 
expectedRetry*2).
-    // Ideally we can assert the exact retry count. We relax here to tolerate 
contention error.
-    int expectedRetry = (int)Math.ceil(Math.log(interval - msginterval));
-    assertTrue(String.format("reportForDuty retries %d times, less than 
expected min %d",
-        count, expectedRetry / 2), count > expectedRetry / 2);
-    assertTrue(String.format("reportForDuty retries %d times, more than 
expected max %d",
-        count, expectedRetry * 2), count < expectedRetry * 2);
-  }
-
-  /**
    * Tests region sever reportForDuty with backup master becomes primary 
master after
    * the first master goes away.
    */

Reply via email to