This is an automated email from the ASF dual-hosted git repository.
szetszwo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ratis.git
The following commit(s) were added to refs/heads/master by this push:
new c31cf5c RATIS-545. Leader Election timeout should consider JVM pause
interval. Contributed by Lokesh Jain
c31cf5c is described below
commit c31cf5c49292be9665c4959cf5db14b068521e02
Author: Tsz Wo Nicholas Sze <[email protected]>
AuthorDate: Mon Jun 3 14:44:02 2019 +0800
RATIS-545. Leader Election timeout should consider JVM pause interval.
Contributed by Lokesh Jain
---
.../src/main/java/org/apache/ratis/util/JavaUtils.java | 13 +++++++++++++
.../java/org/apache/ratis/server/RaftServerConfigKeys.java | 11 ++++++++++-
.../java/org/apache/ratis/server/impl/FollowerState.java | 7 ++++++-
.../java/org/apache/ratis/server/impl/RaftServerImpl.java | 6 ++++++
4 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/ratis-common/src/main/java/org/apache/ratis/util/JavaUtils.java
b/ratis-common/src/main/java/org/apache/ratis/util/JavaUtils.java
index 4e4d6c0..d400414 100644
--- a/ratis-common/src/main/java/org/apache/ratis/util/JavaUtils.java
+++ b/ratis-common/src/main/java/org/apache/ratis/util/JavaUtils.java
@@ -241,4 +241,17 @@ public interface JavaUtils {
throw new CompletionException(t);
}
}
+
+ static boolean sleep(long sleepMs, long thresholdMs) throws
InterruptedException {
+ final Timestamp t = Timestamp.currentTime();
+ Thread.sleep(sleepMs);
+ final long elapsedMs = t.elapsedTimeMs();
+ if (elapsedMs - sleepMs > thresholdMs) {
+ LOG.warn("Unexpected long sleep: sleep({}ms) actually took {}ms which is
over the threshold {}ms",
+ sleepMs, elapsedMs, thresholdMs);
+ return false;
+ }
+ return true;
+ }
+
}
diff --git
a/ratis-server/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java
b/ratis-server/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java
index 0fdd622..c077041 100644
---
a/ratis-server/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java
+++
b/ratis-server/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java
@@ -48,6 +48,16 @@ public interface RaftServerConfigKeys {
setFiles(properties::setFiles, STORAGE_DIR_KEY, storageDir);
}
+ String SLEEP_DEVIATION_THRESHOLD = PREFIX + ".sleep.deviation.threshold";
+ int SLEEP_DEVIATION_THRESHOLD_DEFAULT = 300;
+ static int sleepDeviationThreshold(RaftProperties properties) {
+ return getInt(properties::getInt, SLEEP_DEVIATION_THRESHOLD,
+ SLEEP_DEVIATION_THRESHOLD_DEFAULT, getDefaultLog());
+ }
+ static void setSleepDeviationThreshold(RaftProperties properties, int
thresholdMs) {
+ setInt(properties::setInt, SLEEP_DEVIATION_THRESHOLD, thresholdMs);
+ }
+
/**
* When bootstrapping a new peer, If the gap between the match index of the
* peer and the leader's latest committed index is less than this gap, we
@@ -75,7 +85,6 @@ public interface RaftServerConfigKeys {
}
static void setLeaderElectionTimeout(RaftProperties properties, TimeDuration
leaderElectionTimeout) {
setTimeDuration(properties::setTimeDuration, LEADER_ELECTION_TIMEOUT_KEY,
leaderElectionTimeout);
-
}
interface Write {
diff --git
a/ratis-server/src/main/java/org/apache/ratis/server/impl/FollowerState.java
b/ratis-server/src/main/java/org/apache/ratis/server/impl/FollowerState.java
index 1d2afda..bbb7436 100644
--- a/ratis-server/src/main/java/org/apache/ratis/server/impl/FollowerState.java
+++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/FollowerState.java
@@ -18,6 +18,7 @@
package org.apache.ratis.server.impl;
import org.apache.ratis.util.Daemon;
+import org.apache.ratis.util.JavaUtils;
import org.apache.ratis.util.Timestamp;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -88,10 +89,14 @@ class FollowerState extends Daemon {
@Override
public void run() {
+ long sleepDeviationThresholdMs = server.getSleepDeviationThresholdMs();
while (monitorRunning && server.isFollower()) {
final long electionTimeout = server.getRandomTimeoutMs();
try {
- Thread.sleep(electionTimeout);
+ if (!JavaUtils.sleep(electionTimeout, sleepDeviationThresholdMs)) {
+ continue;
+ }
+
if (!monitorRunning || !server.isFollower()) {
LOG.info("{} heartbeat monitor quit", server.getId());
break;
diff --git
a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java
b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java
index 90241bd..200ac63 100644
---
a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java
+++
b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java
@@ -72,6 +72,7 @@ public class RaftServerImpl implements RaftServerProtocol,
RaftServerAsynchronou
private final int minTimeoutMs;
private final int maxTimeoutMs;
private final int rpcSlownessTimeoutMs;
+ private final int sleepDeviationThresholdMs;
private final boolean installSnapshotEnabled;
private final LifeCycle lifeCycle;
@@ -99,6 +100,7 @@ public class RaftServerImpl implements RaftServerProtocol,
RaftServerAsynchronou
minTimeoutMs =
RaftServerConfigKeys.Rpc.timeoutMin(properties).toIntExact(TimeUnit.MILLISECONDS);
maxTimeoutMs =
RaftServerConfigKeys.Rpc.timeoutMax(properties).toIntExact(TimeUnit.MILLISECONDS);
rpcSlownessTimeoutMs =
RaftServerConfigKeys.Rpc.slownessTimeout(properties).toIntExact(TimeUnit.MILLISECONDS);
+ sleepDeviationThresholdMs =
RaftServerConfigKeys.sleepDeviationThreshold(properties);
installSnapshotEnabled =
RaftServerConfigKeys.Log.Appender.installSnapshotEnabled(properties);
Preconditions.assertTrue(maxTimeoutMs > minTimeoutMs,
"max timeout: %s, min timeout: %s", maxTimeoutMs, minTimeoutMs);
@@ -140,6 +142,10 @@ public class RaftServerImpl implements RaftServerProtocol,
RaftServerAsynchronou
maxTimeoutMs - minTimeoutMs + 1);
}
+ int getSleepDeviationThresholdMs() {
+ return sleepDeviationThresholdMs;
+ }
+
public RaftGroupId getGroupId() {
return groupId;
}