This is an automated email from the ASF dual-hosted git repository.
apurtell pushed a commit to branch branch-1
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-1 by this push:
new 47e42da Revert "HBASE-21164 reportForDuty should do backoff rather
than retry"
47e42da is described below
commit 47e42dab75d75208584e8cabbd90568467833598
Author: Andrew Purtell <[email protected]>
AuthorDate: Wed Feb 20 09:25:46 2019 -0800
Revert "HBASE-21164 reportForDuty should do backoff rather than retry"
This reverts commit f3f3798575527df45961f4cbdb6d4c1d04cfb1e3.
This change causes TestMasterShutdown to reproducibly fail in some
environments.
---
.../org/apache/hadoop/hbase/util/RetryCounter.java | 10 ---
.../java/org/apache/hadoop/hbase/util/Sleeper.java | 31 +++++---
.../org/apache/hadoop/hbase/master/HMaster.java | 13 +---
.../hadoop/hbase/regionserver/HRegionServer.java | 15 +---
.../TestRegionServerReportForDuty.java | 89 ----------------------
5 files changed, 26 insertions(+), 132 deletions(-)
diff --git
a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RetryCounter.java
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RetryCounter.java
index 881dd36..73512fa 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RetryCounter.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RetryCounter.java
@@ -174,14 +174,4 @@ public class RetryCounter {
public int getAttemptTimes() {
return attempts;
}
-
- public long getBackoffTime() {
- return this.retryConfig.backoffPolicy.getBackoffTime(this.retryConfig,
getAttemptTimes());
- }
-
- public long getBackoffTimeAndIncrementAttempts() {
- long backoffTime = getBackoffTime();
- useRetry();
- return backoffTime;
- }
}
diff --git
a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Sleeper.java
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Sleeper.java
index a07ee9c..a60c571 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Sleeper.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Sleeper.java
@@ -50,6 +50,13 @@ public class Sleeper {
}
/**
+ * Sleep for period.
+ */
+ public void sleep() {
+ sleep(System.currentTimeMillis());
+ }
+
+ /**
* If currently asleep, stops sleeping; if not asleep, will skip the next
* sleep cycle.
*/
@@ -61,24 +68,28 @@ public class Sleeper {
}
/**
- * Sleep for period.
+ * Sleep for period adjusted by passed <code>startTime</code>
+ * @param startTime Time some task started previous to now. Time to sleep
+ * will be docked current time minus passed <code>startTime</code>.
*/
- public void sleep() {
- sleep(this.period);
- }
-
- public void sleep(long sleepTime) {
+ public void sleep(final long startTime) {
if (this.stopper.isStopped()) {
return;
}
long now = System.currentTimeMillis();
- long currentSleepTime = sleepTime;
- while (currentSleepTime > 0) {
+ long waitTime = this.period - (now - startTime);
+ if (waitTime > this.period) {
+ LOG.warn("Calculated wait time > " + this.period +
+ "; setting to this.period: " + System.currentTimeMillis() + ", " +
+ startTime);
+ waitTime = this.period;
+ }
+ while (waitTime > 0) {
long woke = -1;
try {
synchronized (sleepLock) {
if (triggerWake) break;
- sleepLock.wait(currentSleepTime);
+ sleepLock.wait(waitTime);
}
woke = System.currentTimeMillis();
long slept = woke - now;
@@ -97,7 +108,7 @@ public class Sleeper {
}
// Recalculate waitTime.
woke = (woke == -1)? System.currentTimeMillis(): woke;
- currentSleepTime = this.period - (woke - now);
+ waitTime = this.period - (woke - startTime);
}
synchronized(sleepLock) {
triggerWake = false;
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index c691402..d8a56bd 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -2649,18 +2649,7 @@ public class HMaster extends HRegionServer implements
MasterServices, Server {
stop("Stopped by " + Thread.currentThread().getName());
}
- @Override
- public void stop(String msg) {
- if (!isStopped()) {
- super.stop(msg);
- if (this.activeMasterManager != null) {
- this.activeMasterManager.stop();
- }
- }
- }
-
- @VisibleForTesting
- protected void checkServiceStarted() throws ServerNotRunningYetException {
+ void checkServiceStarted() throws ServerNotRunningYetException {
if (!serviceStarted) {
throw new ServerNotRunningYetException("Server is not running yet");
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index 2751e72..bc2075d 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -182,8 +182,6 @@ import org.apache.hadoop.hbase.util.HasThread;
import org.apache.hadoop.hbase.util.JSONBean;
import org.apache.hadoop.hbase.util.JvmPauseMonitor;
import org.apache.hadoop.hbase.util.MBeanUtil;
-import org.apache.hadoop.hbase.util.RetryCounter;
-import org.apache.hadoop.hbase.util.RetryCounterFactory;
import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
import org.apache.hadoop.hbase.util.Sleeper;
import org.apache.hadoop.hbase.util.Threads;
@@ -974,18 +972,13 @@ public class HRegionServer extends HasThread implements
this.rsHost = new RegionServerCoprocessorHost(this, this.conf);
}
- // Try and register with the Master; tell it we are here. Break if
server is stopped or the
- // clusterup flag is down or hdfs went wacky. Once registered
successfully, go ahead and start
- // up all Services. Use RetryCounter to get backoff in case Master is
struggling to come up.
- RetryCounterFactory rcf = new RetryCounterFactory(Integer.MAX_VALUE,
- this.sleeper.getPeriod(), 1000 * 60 * 5);
- RetryCounter rc = rcf.create();
+ // Try and register with the Master; tell it we are here. Break if
+ // server is stopped or the clusterup flag is down or hdfs went wacky.
while (keepLooping()) {
RegionServerStartupResponse w = reportForDuty();
if (w == null) {
- long sleepTime = rc.getBackoffTimeAndIncrementAttempts();
- LOG.warn("reportForDuty failed; sleeping " + sleepTime + " ms and
then retrying");
- this.sleeper.sleep(sleepTime);
+ LOG.warn("reportForDuty failed; sleeping and then retrying.");
+ this.sleeper.sleep();
} else {
handleReportForDutyResponse(w);
break;
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
index 792843c..2f60bef 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
@@ -21,9 +21,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
-import java.io.StringWriter;
-import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -32,19 +30,12 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.LocalHBaseCluster;
import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
-import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
-import org.apache.log4j.Appender;
-import org.apache.log4j.Layout;
-import org.apache.log4j.PatternLayout;
-import org.apache.log4j.WriterAppender;
import org.apache.zookeeper.KeeperException;
-
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -82,86 +73,6 @@ public class TestRegionServerReportForDuty {
}
/**
- * LogCapturer is similar to {@link
org.apache.hadoop.test.GenericTestUtils.LogCapturer}
- * except that this implementation has a default appender to the root logger.
- * Hadoop 2.8+ supports the default appender in the LogCapture it ships and
this can be replaced.
- * TODO: This class can be removed after we upgrade Hadoop dependency.
- */
- static class LogCapturer {
- private StringWriter sw = new StringWriter();
- private WriterAppender appender;
- private org.apache.log4j.Logger logger;
-
- LogCapturer(org.apache.log4j.Logger logger) {
- this.logger = logger;
- Appender defaultAppender =
org.apache.log4j.Logger.getRootLogger().getAppender("stdout");
- if (defaultAppender == null) {
- defaultAppender =
org.apache.log4j.Logger.getRootLogger().getAppender("console");
- }
- final Layout layout = (defaultAppender == null) ? new PatternLayout() :
- defaultAppender.getLayout();
- this.appender = new WriterAppender(layout, sw);
- this.logger.addAppender(this.appender);
- }
-
- String getOutput() {
- return sw.toString();
- }
-
- public void stopCapturing() {
- this.logger.removeAppender(this.appender);
- }
- }
-
- /**
- * This test HMaster class will always throw ServerNotRunningYetException if
checked.
- */
- public static class NeverInitializedMaster extends HMaster {
- public NeverInitializedMaster(Configuration conf, CoordinatedStateManager
csm)
- throws IOException, KeeperException, InterruptedException {
- super(conf, csm);
- }
-
- @Override
- protected void checkServiceStarted() throws ServerNotRunningYetException {
- throw new ServerNotRunningYetException("Server is not running yet");
- }
- }
-
- /**
- * Tests region server should backoff to report for duty if master is not
ready.
- */
- @Test
- public void testReportForDutyBackoff() throws IOException,
InterruptedException {
- cluster.getConfiguration().set(HConstants.MASTER_IMPL,
NeverInitializedMaster.class.getName());
- master = cluster.addMaster();
- master.start();
-
- LogCapturer capturer = new
LogCapturer(org.apache.log4j.Logger.getLogger(HRegionServer.class));
- // Set sleep interval relatively low so that exponential backoff is more
demanding.
- int msginterval = 100;
- cluster.getConfiguration().setInt("hbase.regionserver.msginterval",
msginterval);
- rs = cluster.addRegionServer();
- rs.start();
-
- int interval = 10_000;
- Thread.sleep(interval);
- capturer.stopCapturing();
- String output = capturer.getOutput();
- LOG.info(output);
- String failMsg = "reportForDuty failed;";
- int count = StringUtils.countMatches(output, failMsg);
-
- // Following asserts the actual retry number is in range (expectedRetry/2,
expectedRetry*2).
- // Ideally we can assert the exact retry count. We relax here to tolerate
contention error.
- int expectedRetry = (int)Math.ceil(Math.log(interval - msginterval));
- assertTrue(String.format("reportForDuty retries %d times, less than
expected min %d",
- count, expectedRetry / 2), count > expectedRetry / 2);
- assertTrue(String.format("reportForDuty retries %d times, more than
expected max %d",
- count, expectedRetry * 2), count < expectedRetry * 2);
- }
-
- /**
* Tests region sever reportForDuty with backup master becomes primary
master after
* the first master goes away.
*/