This is an automated email from the ASF dual-hosted git repository.
symat pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/zookeeper.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new da9b18a ZOOKEEPER-3037: Add JVMPauseMonitor
da9b18a is described below
commit da9b18ae8f2bfed319e461d7e0ec8c2c2ac1590d
Author: Norbert Kalmar <[email protected]>
AuthorDate: Tue Feb 9 07:45:41 2021 +0000
ZOOKEEPER-3037: Add JVMPauseMonitor
Backporting https://issues.apache.org/jira/browse/ZOOKEEPER-3037 from
branch-3.6 to branch-3.5.
Author: Norbert Kalmar <nkalmaryahoo.com>
Reviewers: andorapache.org
Closes #904 from nkalmar/ZOOKEEPER-3037 and squashes the following commits:
a6105324b [Norbert Kalmar] ZOOKEEPER-3037 - add serviceStop() and improve
unit tests
7d0baaa46 [Norbert Kalmar] ZOOKEEPER-3037 - refactor unit tests
97d2c6103 [Norbert Kalmar] ZOOKEEPER-3037 - cite hadoop-common as source
3661389e4 [Norbert Kalmar] ZOOKEEPER-3037 - Add unit test and various
improvements
f30975765 [Norbert Kalmar] ZOOKEEPER-3037 - Add JvmPauseMonitor
(cherry picked from commit e9adf6ee09ef18258653d65c851fa84c3cd1a51d)
Author: Norbert Kalmar <[email protected]>
Reviewers: Enrico Olivelli <[email protected]>, Mate Szalay-Beko
<[email protected]>, Andor Molnar <[email protected]>
Closes #1594 from symat/ZOOKEEPER-3037-branch-3.5
---
.../org/apache/zookeeper/server/ServerConfig.java | 26 +++
.../apache/zookeeper/server/ZooKeeperServer.java | 27 +++
.../zookeeper/server/ZooKeeperServerMain.java | 10 +-
.../apache/zookeeper/server/quorum/QuorumPeer.java | 16 ++
.../zookeeper/server/quorum/QuorumPeerConfig.java | 39 ++++
.../zookeeper/server/quorum/QuorumPeerMain.java | 5 +
.../zookeeper/server/util/JvmPauseMonitor.java | 209 +++++++++++++++++++++
.../org/apache/zookeeper/ServerConfigTest.java | 23 +++
.../server/quorum/QuorumPeerConfigTest.java | 23 +++
.../zookeeper/server/util/JvmPauseMonitorTest.java | 75 ++++++++
10 files changed, 451 insertions(+), 2 deletions(-)
diff --git
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ServerConfig.java
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ServerConfig.java
index dd3f1da..6b3d1ed 100644
---
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ServerConfig.java
+++
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ServerConfig.java
@@ -49,6 +49,15 @@ public class ServerConfig {
/** defaults to -1 if not set explicitly */
protected int maxSessionTimeout = -1;
+ /** JVM Pause Monitor feature switch */
+ protected boolean jvmPauseMonitorToRun = false;
+ /** JVM Pause Monitor warn threshold in ms */
+ protected long jvmPauseWarnThresholdMs;
+ /** JVM Pause Monitor info threshold in ms */
+ protected long jvmPauseInfoThresholdMs;
+ /** JVM Pause Monitor sleep time in ms */
+ protected long jvmPauseSleepTimeMs;
+
/**
* Parse arguments for server configuration
* @param args clientPort dataDir and optional tickTime and maxClientCnxns
@@ -99,6 +108,10 @@ public class ServerConfig {
maxClientCnxns = config.getMaxClientCnxns();
minSessionTimeout = config.getMinSessionTimeout();
maxSessionTimeout = config.getMaxSessionTimeout();
+ jvmPauseMonitorToRun = config.isJvmPauseMonitorToRun();
+ jvmPauseInfoThresholdMs = config.getJvmPauseInfoThresholdMs();
+ jvmPauseWarnThresholdMs = config.getJvmPauseWarnThresholdMs();
+ jvmPauseSleepTimeMs = config.getJvmPauseSleepTimeMs();
}
public InetSocketAddress getClientPortAddress() {
@@ -115,4 +128,17 @@ public class ServerConfig {
public int getMinSessionTimeout() { return minSessionTimeout; }
/** maximum session timeout in milliseconds, -1 if unset */
public int getMaxSessionTimeout() { return maxSessionTimeout; }
+
+ public long getJvmPauseInfoThresholdMs() {
+ return jvmPauseInfoThresholdMs;
+ }
+ public long getJvmPauseWarnThresholdMs() {
+ return jvmPauseWarnThresholdMs;
+ }
+ public long getJvmPauseSleepTimeMs() {
+ return jvmPauseSleepTimeMs;
+ }
+ public boolean isJvmPauseMonitorToRun() {
+ return jvmPauseMonitorToRun;
+ }
}
diff --git
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
index 4e4feaf..61eb5e1 100644
---
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
+++
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
@@ -70,6 +70,7 @@ import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
import org.apache.zookeeper.server.quorum.ReadOnlyZooKeeperServer;
+import org.apache.zookeeper.server.util.JvmPauseMonitor;
import org.apache.zookeeper.txn.CreateSessionTxn;
import org.apache.zookeeper.txn.TxnHeader;
import org.slf4j.Logger;
@@ -109,6 +110,7 @@ public class ZooKeeperServer implements SessionExpirer,
ServerStats.Provider {
private final AtomicLong hzxid = new AtomicLong(0);
public final static Exception ok = new Exception("No prob");
protected RequestProcessor firstProcessor;
+ protected JvmPauseMonitor jvmPauseMonitor;
protected volatile State state = State.INITIAL;
protected boolean reconfigEnabled;
@@ -186,6 +188,20 @@ public class ZooKeeperServer implements SessionExpirer,
ServerStats.Provider {
}
/**
+ * Adds JvmPauseMonitor and calls
+ * {@link #ZooKeeperServer(FileTxnSnapLog, int, int, int, ZKDatabase,
boolean)}
+ *
+ */
+ public ZooKeeperServer(JvmPauseMonitor jvmPauseMonitor, FileTxnSnapLog
txnLogFactory, int tickTime,
+ int minSessionTimeout, int maxSessionTimeout,
ZKDatabase zkDb, boolean reconfigEnabled) {
+ this(txnLogFactory, tickTime, minSessionTimeout, maxSessionTimeout,
zkDb, reconfigEnabled);
+ this.jvmPauseMonitor = jvmPauseMonitor;
+ if(jvmPauseMonitor != null) {
+ LOG.info("Added JvmPauseMonitor to server");
+ }
+ }
+
+ /**
* creates a zookeeperserver instance.
* @param txnLogFactory the file transaction snapshot logging class
* @param tickTime the ticktime for the server
@@ -475,10 +491,18 @@ public class ZooKeeperServer implements SessionExpirer,
ServerStats.Provider {
registerJMX();
+ startJvmPauseMonitor();
+
setState(State.RUNNING);
notifyAll();
}
+ protected void startJvmPauseMonitor() {
+ if (this.jvmPauseMonitor != null) {
+ this.jvmPauseMonitor.serviceStart();
+ }
+ }
+
protected void setupRequestProcessors() {
RequestProcessor finalProcessor = new FinalRequestProcessor(this);
RequestProcessor syncProcessor = new SyncRequestProcessor(this,
@@ -583,6 +607,9 @@ public class ZooKeeperServer implements SessionExpirer,
ServerStats.Provider {
if (firstProcessor != null) {
firstProcessor.shutdown();
}
+ if(jvmPauseMonitor != null) {
+ jvmPauseMonitor.serviceStop();
+ }
if (zkDb != null) {
if (fullyShutDown) {
diff --git
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServerMain.java
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServerMain.java
index 9dde512..df61d4c 100644
---
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServerMain.java
+++
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServerMain.java
@@ -33,6 +33,7 @@ import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
import org.apache.zookeeper.server.persistence.FileTxnSnapLog.DatadirException;
import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException;
+import org.apache.zookeeper.server.util.JvmPauseMonitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -123,8 +124,13 @@ public class ZooKeeperServerMain {
// run() in this thread.
// create a file logger url from the command line args
txnLog = new FileTxnSnapLog(config.dataLogDir, config.dataDir);
- final ZooKeeperServer zkServer = new ZooKeeperServer(txnLog,
- config.tickTime, config.minSessionTimeout,
config.maxSessionTimeout, null, QuorumPeerConfig.isReconfigEnabled());
+ JvmPauseMonitor jvmPauseMonitor = null;
+ if(config.jvmPauseMonitorToRun) {
+ jvmPauseMonitor = new JvmPauseMonitor(config);
+ }
+ final ZooKeeperServer zkServer = new
ZooKeeperServer(jvmPauseMonitor, txnLog,
+ config.tickTime, config.minSessionTimeout,
config.maxSessionTimeout,
+ null, QuorumPeerConfig.isReconfigEnabled());
txnLog.setServerStats(zkServer.serverStats());
// Registers shutdown handler which will be used to know the
diff --git
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java
index 40a0405..474f70c 100644
---
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java
+++
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java
@@ -73,6 +73,7 @@ import
org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException;
import org.apache.zookeeper.server.quorum.flexible.QuorumMaj;
import org.apache.zookeeper.server.quorum.flexible.QuorumVerifier;
import org.apache.zookeeper.server.util.ConfigUtils;
+import org.apache.zookeeper.server.util.JvmPauseMonitor;
import org.apache.zookeeper.server.util.ZxidUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -130,6 +131,7 @@ public class QuorumPeer extends ZooKeeperThread implements
QuorumStats.Provider
*/
private ZKDatabase zkDb;
+ private JvmPauseMonitor jvmPauseMonitor;
public static final class AddressTuple {
public final InetSocketAddress quorumAddr;
public final InetSocketAddress electionAddr;
@@ -450,6 +452,10 @@ public class QuorumPeer extends ZooKeeperThread implements
QuorumStats.Provider
return getVotingView().size();
}
+ public void setJvmPauseMonitor(JvmPauseMonitor jvmPauseMonitor) {
+ this.jvmPauseMonitor = jvmPauseMonitor;
+ }
+
/**
* QuorumVerifier implementation; default (majority).
*/
@@ -896,6 +902,7 @@ public class QuorumPeer extends ZooKeeperThread implements
QuorumStats.Provider
System.out.println(e);
}
startLeaderElection();
+ startJvmPauseMonitor();
super.start();
}
@@ -974,6 +981,12 @@ public class QuorumPeer extends ZooKeeperThread implements
QuorumStats.Provider
this.electionAlg = createElectionAlgorithm(electionType);
}
+ private void startJvmPauseMonitor() {
+ if (this.jvmPauseMonitor != null) {
+ this.jvmPauseMonitor.serviceStart();
+ }
+ }
+
/**
* Count the number of nodes in the map that could be followers.
* @param peers
@@ -1333,6 +1346,9 @@ public class QuorumPeer extends ZooKeeperThread
implements QuorumStats.Provider
if(udpSocket != null) {
udpSocket.close();
}
+ if(jvmPauseMonitor != null) {
+ jvmPauseMonitor.serviceStop();
+ }
try {
adminServer.shutdown();
diff --git
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerConfig.java
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerConfig.java
index f05ab3f..2946871 100644
---
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerConfig.java
+++
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerConfig.java
@@ -39,6 +39,7 @@ import java.util.Map.Entry;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.zookeeper.common.ClientX509Util;
import org.apache.zookeeper.common.StringUtils;
+import org.apache.zookeeper.server.util.JvmPauseMonitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
@@ -117,6 +118,23 @@ public class QuorumPeerConfig {
*/
private final int MIN_SNAP_RETAIN_COUNT = 3;
+ /**
+ * JVM Pause Monitor feature switch
+ */
+ protected boolean jvmPauseMonitorToRun = false;
+ /**
+ * JVM Pause Monitor warn threshold in ms
+ */
+ protected long jvmPauseWarnThresholdMs =
JvmPauseMonitor.WARN_THRESHOLD_DEFAULT;
+ /**
+ * JVM Pause Monitor info threshold in ms
+ */
+ protected long jvmPauseInfoThresholdMs =
JvmPauseMonitor.INFO_THRESHOLD_DEFAULT;
+ /**
+ * JVM Pause Monitor sleep time in ms
+ */
+ protected long jvmPauseSleepTimeMs = JvmPauseMonitor.SLEEP_TIME_MS_DEFAULT;
+
@SuppressWarnings("serial")
public static class ConfigException extends Exception {
public ConfigException(String msg) {
@@ -331,6 +349,14 @@ public class QuorumPeerConfig {
quorumServicePrincipal = value;
} else if (key.equals("quorum.cnxn.threads.size")) {
quorumCnxnThreadsSize = Integer.parseInt(value);
+ } else if (key.equals(JvmPauseMonitor.INFO_THRESHOLD_KEY)) {
+ jvmPauseInfoThresholdMs = Long.parseLong(value);
+ } else if (key.equals(JvmPauseMonitor.WARN_THRESHOLD_KEY)) {
+ jvmPauseWarnThresholdMs = Long.parseLong(value);
+ } else if (key.equals(JvmPauseMonitor.SLEEP_TIME_MS_KEY)) {
+ jvmPauseSleepTimeMs = Long.parseLong(value);
+ } else if
(key.equals(JvmPauseMonitor.JVM_PAUSE_MONITOR_FEATURE_SWITCH_KEY)) {
+ jvmPauseMonitorToRun = Boolean.parseBoolean(value);
} else {
System.setProperty("zookeeper." + key, value);
}
@@ -790,6 +816,19 @@ public class QuorumPeerConfig {
return Collections.unmodifiableMap(quorumVerifier.getAllMembers());
}
+ public long getJvmPauseInfoThresholdMs() {
+ return jvmPauseInfoThresholdMs;
+ }
+ public long getJvmPauseWarnThresholdMs() {
+ return jvmPauseWarnThresholdMs;
+ }
+ public long getJvmPauseSleepTimeMs() {
+ return jvmPauseSleepTimeMs;
+ }
+ public boolean isJvmPauseMonitorToRun() {
+ return jvmPauseMonitorToRun;
+ }
+
public long getServerId() { return serverId; }
public boolean isDistributed() {
diff --git
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerMain.java
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerMain.java
index a5f86a8..0f5ddca 100644
---
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerMain.java
+++
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerMain.java
@@ -23,6 +23,7 @@ import javax.management.JMException;
import javax.security.sasl.SaslException;
import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.zookeeper.server.util.JvmPauseMonitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.zookeeper.jmx.ManagedUtil;
@@ -202,6 +203,10 @@ public class QuorumPeerMain {
quorumPeer.setQuorumCnxnThreadsSize(config.quorumCnxnThreadsSize);
quorumPeer.initialize();
+ if(config.jvmPauseMonitorToRun) {
+ quorumPeer.setJvmPauseMonitor(new JvmPauseMonitor(config));
+ }
+
quorumPeer.start();
quorumPeer.join();
} catch (InterruptedException e) {
diff --git
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/util/JvmPauseMonitor.java
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/util/JvmPauseMonitor.java
new file mode 100644
index 0000000..0ce617f
--- /dev/null
+++
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/util/JvmPauseMonitor.java
@@ -0,0 +1,209 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.server.util;
+
+import org.apache.zookeeper.server.ServerConfig;
+import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.management.GarbageCollectorMXBean;
+import java.lang.management.ManagementFactory;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.List;
+
+/**
+ * This code is originally from hadoop-common, see:
+ *
https://github.com/apache/hadoop/blob/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java
+ *
+ * Class which sets up a simple thread which runs in a loop sleeping
+ * for a short interval of time. If the sleep takes significantly longer
+ * than its target time, it implies that the JVM or host machine has
+ * paused processing, which may cause other problems. If such a pause is
+ * detected, the thread logs a message.
+ */
+public class JvmPauseMonitor {
+ private static final Logger LOG =
LoggerFactory.getLogger(JvmPauseMonitor.class);
+
+ public static final String JVM_PAUSE_MONITOR_FEATURE_SWITCH_KEY =
"jvm.pause.monitor";
+
+ /** The target sleep time */
+ protected long sleepTimeMs;
+ public static final String SLEEP_TIME_MS_KEY = "jvm.pause.sleep.time.ms";
+ public static final long SLEEP_TIME_MS_DEFAULT = 500;
+
+ /** log WARN if we detect a pause longer than this threshold */
+ protected long warnThresholdMs;
+ public static final String WARN_THRESHOLD_KEY =
"jvm.pause.warn-threshold.ms";
+ public static final long WARN_THRESHOLD_DEFAULT = 10000;
+
+ /** log INFO if we detect a pause longer than this threshold */
+ protected long infoThresholdMs;
+ public static final String INFO_THRESHOLD_KEY =
"jvm.pause.info-threshold.ms";
+ public static final long INFO_THRESHOLD_DEFAULT = 1000;
+
+ private long numGcWarnThresholdExceeded = 0;
+ private long numGcInfoThresholdExceeded = 0;
+ private long totalGcExtraSleepTime = 0;
+
+ private Thread monitorThread;
+ private volatile boolean shouldRun = true;
+
+ public JvmPauseMonitor(QuorumPeerConfig config) {
+ this.warnThresholdMs = config.getJvmPauseWarnThresholdMs();
+ this.infoThresholdMs = config.getJvmPauseInfoThresholdMs();
+ this.sleepTimeMs = config.getJvmPauseSleepTimeMs();
+ }
+
+ public JvmPauseMonitor(ServerConfig config) {
+ this.warnThresholdMs = config.getJvmPauseWarnThresholdMs();
+ this.infoThresholdMs = config.getJvmPauseInfoThresholdMs();
+ this.sleepTimeMs = config.getJvmPauseSleepTimeMs();
+ }
+
+ public void serviceStart() {
+ monitorThread = new Thread(new JVMMonitor());
+ monitorThread.setDaemon(true);
+ monitorThread.start();
+ }
+
+ public void serviceStop() {
+ shouldRun = false;
+ if (monitorThread != null) {
+ monitorThread.interrupt();
+ try {
+ monitorThread.join();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ }
+ }
+ }
+
+ public boolean isStarted() {
+ return monitorThread != null;
+ }
+
+ public long getNumGcWarnThresholdExceeded() {
+ return numGcWarnThresholdExceeded;
+ }
+
+ public long getNumGcInfoThresholdExceeded() {
+ return numGcInfoThresholdExceeded;
+ }
+
+ public long getTotalGcExtraSleepTime() {
+ return totalGcExtraSleepTime;
+ }
+
+ private String formatMessage(long extraSleepTime,
+ Map<String, GcTimes> gcTimesAfterSleep,
+ Map<String, GcTimes> gcTimesBeforeSleep) {
+
+ Set<String> gcBeanNames = new HashSet<>(gcTimesAfterSleep.keySet());
+ gcBeanNames.retainAll(gcTimesBeforeSleep.keySet());
+ List<String> gcDiffs = new ArrayList<>();
+
+ for (String name : gcBeanNames) {
+ GcTimes diff =
gcTimesAfterSleep.get(name).subtract(gcTimesBeforeSleep.get(name));
+ if (diff.gcCount != 0) {
+ gcDiffs.add("GC pool '" + name + "' had collection(s): " +
diff.toString());
+ }
+ }
+
+ String ret = String.format("Detected pause in JVM or host machine (eg
GC): pause of approximately %d ms, " +
+ "total pause: info level: %d, warn level: %d %n",
+ extraSleepTime, numGcInfoThresholdExceeded,
numGcWarnThresholdExceeded);
+ if (gcDiffs.isEmpty()) {
+ ret += ("No GCs detected");
+ } else {
+ ret += String.join("\n", gcDiffs);
+ }
+ return ret;
+ }
+
+ private Map<String, GcTimes> getGcTimes() {
+ Map<String, GcTimes> map = new HashMap<>();
+ List<GarbageCollectorMXBean> gcBeans =
ManagementFactory.getGarbageCollectorMXBeans();
+ for (GarbageCollectorMXBean gcBean : gcBeans) {
+ map.put(gcBean.getName(), new GcTimes(gcBean));
+ }
+ return map;
+ }
+
+ private static class GcTimes {
+
+ private long gcCount;
+ private long gcTimeMillis;
+
+ private GcTimes(GarbageCollectorMXBean gcBean) {
+ gcCount = gcBean.getCollectionCount();
+ gcTimeMillis = gcBean.getCollectionTime();
+ }
+
+ private GcTimes(long count, long time) {
+ this.gcCount = count;
+ this.gcTimeMillis = time;
+ }
+
+ private GcTimes subtract(GcTimes other) {
+ return new GcTimes(this.gcCount - other.gcCount,
+ this.gcTimeMillis - other.gcTimeMillis);
+ }
+
+ public String toString() {
+ return "count=" + gcCount + " time=" + gcTimeMillis + "ms";
+ }
+
+ }
+
+ private class JVMMonitor implements Runnable {
+ @Override
+ public void run() {
+ Map<String, GcTimes> gcTimesBeforeSleep = getGcTimes();
+ LOG.info("Starting JVM Pause Monitor with infoThresholdMs:{}
warnThresholdMs:{} and sleepTimeMs:{}",
+ infoThresholdMs, warnThresholdMs, sleepTimeMs);
+ while (shouldRun) {
+ long startTime = Instant.now().toEpochMilli();
+ try {
+ Thread.sleep(sleepTimeMs);
+ } catch (InterruptedException ie) {
+ return;
+ }
+ long endTime = Instant.now().toEpochMilli();
+ long extraSleepTime = (endTime - startTime) - sleepTimeMs;
+ Map<String, GcTimes> gcTimesAfterSleep = getGcTimes();
+
+ if (extraSleepTime > warnThresholdMs) {
+ ++numGcWarnThresholdExceeded;
+ LOG.warn(formatMessage(extraSleepTime, gcTimesAfterSleep,
gcTimesBeforeSleep));
+ } else if (extraSleepTime > infoThresholdMs) {
+ ++numGcInfoThresholdExceeded;
+ LOG.info(formatMessage(extraSleepTime, gcTimesAfterSleep,
gcTimesBeforeSleep));
+ }
+ totalGcExtraSleepTime += extraSleepTime;
+ gcTimesBeforeSleep = gcTimesAfterSleep;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git
a/zookeeper-server/src/test/java/org/apache/zookeeper/ServerConfigTest.java
b/zookeeper-server/src/test/java/org/apache/zookeeper/ServerConfigTest.java
index 27faa74..e542670 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/ServerConfigTest.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/ServerConfigTest.java
@@ -19,12 +19,15 @@
package org.apache.zookeeper;
import org.apache.zookeeper.server.ServerConfig;
+import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
import java.io.File;
@@ -60,6 +63,26 @@ public class ServerConfigTest {
serverConfig.parse(args);
}
+ @Test
+ public void testJvmPauseMonitorConfigured() {
+ final Long sleepTime = 444L;
+ final Long warnTH = 5555L;
+ final Long infoTH = 555L;
+
+ QuorumPeerConfig qpConfig = mock(QuorumPeerConfig.class);
+ when(qpConfig.isJvmPauseMonitorToRun()).thenReturn(true);
+ when(qpConfig.getJvmPauseSleepTimeMs()).thenReturn(sleepTime);
+ when(qpConfig.getJvmPauseWarnThresholdMs()).thenReturn(warnTH);
+ when(qpConfig.getJvmPauseInfoThresholdMs()).thenReturn(infoTH);
+
+ serverConfig.readFrom(qpConfig);
+
+ assertEquals(sleepTime,
Long.valueOf(serverConfig.getJvmPauseSleepTimeMs()));
+ assertEquals(warnTH,
Long.valueOf(serverConfig.getJvmPauseWarnThresholdMs()));
+ assertEquals(infoTH,
Long.valueOf(serverConfig.getJvmPauseInfoThresholdMs()));
+ assertTrue(serverConfig.isJvmPauseMonitorToRun());
+ }
+
boolean checkEquality(String a, String b) {
assertNotNull(a);
assertNotNull(b);
diff --git
a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/QuorumPeerConfigTest.java
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/QuorumPeerConfigTest.java
index 5dfd2d0..d56a618 100644
---
a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/QuorumPeerConfigTest.java
+++
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/QuorumPeerConfigTest.java
@@ -20,6 +20,7 @@ package org.apache.zookeeper.server.quorum;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
@@ -115,6 +116,28 @@ public class QuorumPeerConfigTest {
quorumPeerConfig.parseProperties(zkProp);
}
+ @Test
+ public void testJvmPauseMonitorConfigured()
+ throws IOException, ConfigException {
+ final Long sleepTime = 444L;
+ final Long warnTH = 5555L;
+ final Long infoTH = 555L;
+
+ QuorumPeerConfig quorumPeerConfig = new QuorumPeerConfig();
+ Properties zkProp = getDefaultZKProperties();
+ zkProp.setProperty("dataDir", new File("myDataDir").getAbsolutePath());
+ zkProp.setProperty("jvm.pause.monitor", "true");
+ zkProp.setProperty("jvm.pause.sleep.time.ms", sleepTime.toString());
+ zkProp.setProperty("jvm.pause.warn-threshold.ms", warnTH.toString());
+ zkProp.setProperty("jvm.pause.info-threshold.ms", infoTH.toString());
+ quorumPeerConfig.parseProperties(zkProp);
+
+ assertEquals(sleepTime,
Long.valueOf(quorumPeerConfig.getJvmPauseSleepTimeMs()));
+ assertEquals(warnTH,
Long.valueOf(quorumPeerConfig.getJvmPauseWarnThresholdMs()));
+ assertEquals(infoTH,
Long.valueOf(quorumPeerConfig.getJvmPauseInfoThresholdMs()));
+ assertTrue(quorumPeerConfig.isJvmPauseMonitorToRun());
+ }
+
private Properties getDefaultZKProperties() {
Properties zkProp = new Properties();
zkProp.setProperty("dataDir", new File("myDataDir").getAbsolutePath());
diff --git
a/zookeeper-server/src/test/java/org/apache/zookeeper/server/util/JvmPauseMonitorTest.java
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/util/JvmPauseMonitorTest.java
new file mode 100644
index 0000000..4e701b8
--- /dev/null
+++
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/util/JvmPauseMonitorTest.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.server.util;
+
+import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class JvmPauseMonitorTest {
+
+ private final Long sleepTime = 100L;
+ private final Long infoTH = -1L;
+ private final Long warnTH = -1L;
+ private JvmPauseMonitor pauseMonitor;
+
+ @Test(timeout=5000)
+ public void testJvmPauseMonitorExceedInfoThreshold() throws
InterruptedException {
+ QuorumPeerConfig qpConfig = mock(QuorumPeerConfig.class);
+ when(qpConfig.getJvmPauseSleepTimeMs()).thenReturn(sleepTime);
+ when(qpConfig.getJvmPauseInfoThresholdMs()).thenReturn(infoTH);
+
+ pauseMonitor = new JvmPauseMonitor(qpConfig);
+ pauseMonitor.serviceStart();
+
+ Assert.assertEquals(sleepTime, Long.valueOf(pauseMonitor.sleepTimeMs));
+ Assert.assertEquals(infoTH,
Long.valueOf(pauseMonitor.infoThresholdMs));
+
+ while(pauseMonitor.getNumGcInfoThresholdExceeded() == 0) {
+ Thread.sleep(200);
+ }
+ }
+
+ @Test(timeout=5000)
+ public void testJvmPauseMonitorExceedWarnThreshold() throws
InterruptedException {
+ QuorumPeerConfig qpConfig = mock(QuorumPeerConfig.class);
+ when(qpConfig.getJvmPauseSleepTimeMs()).thenReturn(sleepTime);
+ when(qpConfig.getJvmPauseWarnThresholdMs()).thenReturn(warnTH);
+
+ pauseMonitor = new JvmPauseMonitor(qpConfig);
+ pauseMonitor.serviceStart();
+
+ Assert.assertEquals(sleepTime, Long.valueOf(pauseMonitor.sleepTimeMs));
+ Assert.assertEquals(warnTH,
Long.valueOf(pauseMonitor.warnThresholdMs));
+
+ while(pauseMonitor.getNumGcWarnThresholdExceeded() == 0) {
+ Thread.sleep(200);
+ }
+
+ }
+
+ @After
+ public void teardown() {
+ pauseMonitor.serviceStop();
+ }
+}