This is an automated email from the ASF dual-hosted git repository.

symat pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/zookeeper.git


The following commit(s) were added to refs/heads/branch-3.5 by this push:
     new da9b18a  ZOOKEEPER-3037: Add JVMPauseMonitor
da9b18a is described below

commit da9b18ae8f2bfed319e461d7e0ec8c2c2ac1590d
Author: Norbert Kalmar <[email protected]>
AuthorDate: Tue Feb 9 07:45:41 2021 +0000

    ZOOKEEPER-3037: Add JVMPauseMonitor
    
    Backporting https://issues.apache.org/jira/browse/ZOOKEEPER-3037 from 
branch-3.6 to branch-3.5.
    
    Author: Norbert Kalmar <nkalmaryahoo.com>
    
    Reviewers: andorapache.org
    
    Closes #904 from nkalmar/ZOOKEEPER-3037 and squashes the following commits:
    
    a6105324b [Norbert Kalmar] ZOOKEEPER-3037 - add serviceStop() and improve 
unit tests
    7d0baaa46 [Norbert Kalmar] ZOOKEEPER-3037 - refactor unit tests
    97d2c6103 [Norbert Kalmar] ZOOKEEPER-3037 - cite hadoop-common as source
    3661389e4 [Norbert Kalmar] ZOOKEEPER-3037 - Add unit test and various 
improvements
    f30975765 [Norbert Kalmar] ZOOKEEPER-3037 - Add JvmPauseMonitor
    
    (cherry picked from commit e9adf6ee09ef18258653d65c851fa84c3cd1a51d)
    
    Author: Norbert Kalmar <[email protected]>
    
    Reviewers: Enrico Olivelli <[email protected]>, Mate Szalay-Beko 
<[email protected]>, Andor Molnar <[email protected]>
    
    Closes #1594 from symat/ZOOKEEPER-3037-branch-3.5
---
 .../org/apache/zookeeper/server/ServerConfig.java  |  26 +++
 .../apache/zookeeper/server/ZooKeeperServer.java   |  27 +++
 .../zookeeper/server/ZooKeeperServerMain.java      |  10 +-
 .../apache/zookeeper/server/quorum/QuorumPeer.java |  16 ++
 .../zookeeper/server/quorum/QuorumPeerConfig.java  |  39 ++++
 .../zookeeper/server/quorum/QuorumPeerMain.java    |   5 +
 .../zookeeper/server/util/JvmPauseMonitor.java     | 209 +++++++++++++++++++++
 .../org/apache/zookeeper/ServerConfigTest.java     |  23 +++
 .../server/quorum/QuorumPeerConfigTest.java        |  23 +++
 .../zookeeper/server/util/JvmPauseMonitorTest.java |  75 ++++++++
 10 files changed, 451 insertions(+), 2 deletions(-)

diff --git 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ServerConfig.java 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ServerConfig.java
index dd3f1da..6b3d1ed 100644
--- 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ServerConfig.java
+++ 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ServerConfig.java
@@ -49,6 +49,15 @@ public class ServerConfig {
     /** defaults to -1 if not set explicitly */
     protected int maxSessionTimeout = -1;
 
+    /** JVM Pause Monitor feature switch */
+    protected boolean jvmPauseMonitorToRun = false;
+    /** JVM Pause Monitor warn threshold in ms */
+    protected long jvmPauseWarnThresholdMs;
+    /** JVM Pause Monitor info threshold in ms */
+    protected long jvmPauseInfoThresholdMs;
+    /** JVM Pause Monitor sleep time in ms */
+    protected long jvmPauseSleepTimeMs;
+
     /**
      * Parse arguments for server configuration
      * @param args clientPort dataDir and optional tickTime and maxClientCnxns
@@ -99,6 +108,10 @@ public class ServerConfig {
         maxClientCnxns = config.getMaxClientCnxns();
         minSessionTimeout = config.getMinSessionTimeout();
         maxSessionTimeout = config.getMaxSessionTimeout();
+        jvmPauseMonitorToRun = config.isJvmPauseMonitorToRun();
+        jvmPauseInfoThresholdMs = config.getJvmPauseInfoThresholdMs();
+        jvmPauseWarnThresholdMs = config.getJvmPauseWarnThresholdMs();
+        jvmPauseSleepTimeMs = config.getJvmPauseSleepTimeMs();
     }
 
     public InetSocketAddress getClientPortAddress() {
@@ -115,4 +128,17 @@ public class ServerConfig {
     public int getMinSessionTimeout() { return minSessionTimeout; }
     /** maximum session timeout in milliseconds, -1 if unset */
     public int getMaxSessionTimeout() { return maxSessionTimeout; }
+
+    public long getJvmPauseInfoThresholdMs() {
+        return jvmPauseInfoThresholdMs;
+    }
+    public long getJvmPauseWarnThresholdMs() {
+        return jvmPauseWarnThresholdMs;
+    }
+    public long getJvmPauseSleepTimeMs() {
+        return jvmPauseSleepTimeMs;
+    }
+    public boolean isJvmPauseMonitorToRun() {
+        return jvmPauseMonitorToRun;
+    }
 }
diff --git 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
index 4e4feaf..61eb5e1 100644
--- 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
+++ 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
@@ -70,6 +70,7 @@ import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
 import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
 import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
 import org.apache.zookeeper.server.quorum.ReadOnlyZooKeeperServer;
+import org.apache.zookeeper.server.util.JvmPauseMonitor;
 import org.apache.zookeeper.txn.CreateSessionTxn;
 import org.apache.zookeeper.txn.TxnHeader;
 import org.slf4j.Logger;
@@ -109,6 +110,7 @@ public class ZooKeeperServer implements SessionExpirer, 
ServerStats.Provider {
     private final AtomicLong hzxid = new AtomicLong(0);
     public final static Exception ok = new Exception("No prob");
     protected RequestProcessor firstProcessor;
+    protected JvmPauseMonitor jvmPauseMonitor;
     protected volatile State state = State.INITIAL;
     protected boolean reconfigEnabled;
 
@@ -186,6 +188,20 @@ public class ZooKeeperServer implements SessionExpirer, 
ServerStats.Provider {
     }
 
     /**
+     * Adds JvmPauseMonitor and calls
+     * {@link #ZooKeeperServer(FileTxnSnapLog, int, int, int, ZKDatabase, 
boolean)}
+     *
+     */
+    public ZooKeeperServer(JvmPauseMonitor jvmPauseMonitor, FileTxnSnapLog 
txnLogFactory, int tickTime,
+                           int minSessionTimeout, int maxSessionTimeout, 
ZKDatabase zkDb, boolean reconfigEnabled) {
+        this(txnLogFactory, tickTime, minSessionTimeout, maxSessionTimeout, 
zkDb, reconfigEnabled);
+        this.jvmPauseMonitor = jvmPauseMonitor;
+        if(jvmPauseMonitor != null) {
+            LOG.info("Added JvmPauseMonitor to server");
+        }
+    }
+
+    /**
      * creates a zookeeperserver instance.
      * @param txnLogFactory the file transaction snapshot logging class
      * @param tickTime the ticktime for the server
@@ -475,10 +491,18 @@ public class ZooKeeperServer implements SessionExpirer, 
ServerStats.Provider {
 
         registerJMX();
 
+        startJvmPauseMonitor();
+
         setState(State.RUNNING);
         notifyAll();
     }
 
+    protected void startJvmPauseMonitor() {
+        if (this.jvmPauseMonitor != null) {
+            this.jvmPauseMonitor.serviceStart();
+        }
+    }
+
     protected void setupRequestProcessors() {
         RequestProcessor finalProcessor = new FinalRequestProcessor(this);
         RequestProcessor syncProcessor = new SyncRequestProcessor(this,
@@ -583,6 +607,9 @@ public class ZooKeeperServer implements SessionExpirer, 
ServerStats.Provider {
         if (firstProcessor != null) {
             firstProcessor.shutdown();
         }
+        if(jvmPauseMonitor != null) {
+            jvmPauseMonitor.serviceStop();
+        }
 
         if (zkDb != null) {
             if (fullyShutDown) {
diff --git 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServerMain.java
 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServerMain.java
index 9dde512..df61d4c 100644
--- 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServerMain.java
+++ 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServerMain.java
@@ -33,6 +33,7 @@ import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
 import org.apache.zookeeper.server.persistence.FileTxnSnapLog.DatadirException;
 import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
 import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException;
+import org.apache.zookeeper.server.util.JvmPauseMonitor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -123,8 +124,13 @@ public class ZooKeeperServerMain {
             // run() in this thread.
             // create a file logger url from the command line args
             txnLog = new FileTxnSnapLog(config.dataLogDir, config.dataDir);
-            final ZooKeeperServer zkServer = new ZooKeeperServer(txnLog,
-                    config.tickTime, config.minSessionTimeout, 
config.maxSessionTimeout, null, QuorumPeerConfig.isReconfigEnabled());
+            JvmPauseMonitor jvmPauseMonitor = null;
+            if(config.jvmPauseMonitorToRun) {
+                jvmPauseMonitor = new JvmPauseMonitor(config);
+            }
+            final ZooKeeperServer zkServer = new 
ZooKeeperServer(jvmPauseMonitor, txnLog,
+                    config.tickTime, config.minSessionTimeout, 
config.maxSessionTimeout,
+                    null, QuorumPeerConfig.isReconfigEnabled());
             txnLog.setServerStats(zkServer.serverStats());
 
             // Registers shutdown handler which will be used to know the
diff --git 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java
 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java
index 40a0405..474f70c 100644
--- 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java
+++ 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeer.java
@@ -73,6 +73,7 @@ import 
org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException;
 import org.apache.zookeeper.server.quorum.flexible.QuorumMaj;
 import org.apache.zookeeper.server.quorum.flexible.QuorumVerifier;
 import org.apache.zookeeper.server.util.ConfigUtils;
+import org.apache.zookeeper.server.util.JvmPauseMonitor;
 import org.apache.zookeeper.server.util.ZxidUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -130,6 +131,7 @@ public class QuorumPeer extends ZooKeeperThread implements 
QuorumStats.Provider
      */
     private ZKDatabase zkDb;
 
+    private JvmPauseMonitor jvmPauseMonitor;
     public static final class AddressTuple {
         public final InetSocketAddress quorumAddr;
         public final InetSocketAddress electionAddr;
@@ -450,6 +452,10 @@ public class QuorumPeer extends ZooKeeperThread implements 
QuorumStats.Provider
         return getVotingView().size();
     }
 
+    public void setJvmPauseMonitor(JvmPauseMonitor jvmPauseMonitor) {
+        this.jvmPauseMonitor = jvmPauseMonitor;
+    }
+
     /**
      * QuorumVerifier implementation; default (majority).
      */
@@ -896,6 +902,7 @@ public class QuorumPeer extends ZooKeeperThread implements 
QuorumStats.Provider
             System.out.println(e);
         }
         startLeaderElection();
+        startJvmPauseMonitor();
         super.start();
     }
 
@@ -974,6 +981,12 @@ public class QuorumPeer extends ZooKeeperThread implements 
QuorumStats.Provider
         this.electionAlg = createElectionAlgorithm(electionType);
     }
 
+    private void startJvmPauseMonitor() {
+        if (this.jvmPauseMonitor != null) {
+            this.jvmPauseMonitor.serviceStart();
+        }
+    }
+
     /**
      * Count the number of nodes in the map that could be followers.
      * @param peers
@@ -1333,6 +1346,9 @@ public class QuorumPeer extends ZooKeeperThread 
implements QuorumStats.Provider
         if(udpSocket != null) {
             udpSocket.close();
         }
+        if(jvmPauseMonitor != null) {
+            jvmPauseMonitor.serviceStop();
+        }
 
         try {
             adminServer.shutdown();
diff --git 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerConfig.java
 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerConfig.java
index f05ab3f..2946871 100644
--- 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerConfig.java
+++ 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerConfig.java
@@ -39,6 +39,7 @@ import java.util.Map.Entry;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.zookeeper.common.ClientX509Util;
 import org.apache.zookeeper.common.StringUtils;
+import org.apache.zookeeper.server.util.JvmPauseMonitor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.slf4j.MDC;
@@ -117,6 +118,23 @@ public class QuorumPeerConfig {
      */
     private final int MIN_SNAP_RETAIN_COUNT = 3;
 
+    /**
+     * JVM Pause Monitor feature switch
+     */
+    protected boolean jvmPauseMonitorToRun = false;
+    /**
+     * JVM Pause Monitor warn threshold in ms
+     */
+    protected long jvmPauseWarnThresholdMs = 
JvmPauseMonitor.WARN_THRESHOLD_DEFAULT;
+    /**
+     * JVM Pause Monitor info threshold in ms
+     */
+    protected long jvmPauseInfoThresholdMs = 
JvmPauseMonitor.INFO_THRESHOLD_DEFAULT;
+    /**
+     * JVM Pause Monitor sleep time in ms
+     */
+    protected long jvmPauseSleepTimeMs = JvmPauseMonitor.SLEEP_TIME_MS_DEFAULT;
+
     @SuppressWarnings("serial")
     public static class ConfigException extends Exception {
         public ConfigException(String msg) {
@@ -331,6 +349,14 @@ public class QuorumPeerConfig {
                 quorumServicePrincipal = value;
             } else if (key.equals("quorum.cnxn.threads.size")) {
                 quorumCnxnThreadsSize = Integer.parseInt(value);
+            } else if (key.equals(JvmPauseMonitor.INFO_THRESHOLD_KEY)) {
+                jvmPauseInfoThresholdMs = Long.parseLong(value);
+            } else if (key.equals(JvmPauseMonitor.WARN_THRESHOLD_KEY)) {
+                jvmPauseWarnThresholdMs = Long.parseLong(value);
+            } else if (key.equals(JvmPauseMonitor.SLEEP_TIME_MS_KEY)) {
+                jvmPauseSleepTimeMs = Long.parseLong(value);
+            } else if 
(key.equals(JvmPauseMonitor.JVM_PAUSE_MONITOR_FEATURE_SWITCH_KEY)) {
+                jvmPauseMonitorToRun = Boolean.parseBoolean(value);
             } else {
                 System.setProperty("zookeeper." + key, value);
             }
@@ -790,6 +816,19 @@ public class QuorumPeerConfig {
         return Collections.unmodifiableMap(quorumVerifier.getAllMembers());
     }
 
+    public long getJvmPauseInfoThresholdMs() {
+        return jvmPauseInfoThresholdMs;
+    }
+    public long getJvmPauseWarnThresholdMs() {
+        return jvmPauseWarnThresholdMs;
+    }
+    public long getJvmPauseSleepTimeMs() {
+        return jvmPauseSleepTimeMs;
+    }
+    public boolean isJvmPauseMonitorToRun() {
+        return jvmPauseMonitorToRun;
+    }
+
     public long getServerId() { return serverId; }
 
     public boolean isDistributed() {
diff --git 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerMain.java
 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerMain.java
index a5f86a8..0f5ddca 100644
--- 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerMain.java
+++ 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumPeerMain.java
@@ -23,6 +23,7 @@ import javax.management.JMException;
 import javax.security.sasl.SaslException;
 
 import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.zookeeper.server.util.JvmPauseMonitor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.zookeeper.jmx.ManagedUtil;
@@ -202,6 +203,10 @@ public class QuorumPeerMain {
           quorumPeer.setQuorumCnxnThreadsSize(config.quorumCnxnThreadsSize);
           quorumPeer.initialize();
 
+          if(config.jvmPauseMonitorToRun) {
+              quorumPeer.setJvmPauseMonitor(new JvmPauseMonitor(config));
+          }
+
           quorumPeer.start();
           quorumPeer.join();
       } catch (InterruptedException e) {
diff --git 
a/zookeeper-server/src/main/java/org/apache/zookeeper/server/util/JvmPauseMonitor.java
 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/util/JvmPauseMonitor.java
new file mode 100644
index 0000000..0ce617f
--- /dev/null
+++ 
b/zookeeper-server/src/main/java/org/apache/zookeeper/server/util/JvmPauseMonitor.java
@@ -0,0 +1,209 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.server.util;
+
+import org.apache.zookeeper.server.ServerConfig;
+import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.management.GarbageCollectorMXBean;
+import java.lang.management.ManagementFactory;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.List;
+
+/**
+ * This code is originally from hadoop-common, see:
+ * 
https://github.com/apache/hadoop/blob/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java
+ *
+ * Class which sets up a simple thread which runs in a loop sleeping
+ * for a short interval of time. If the sleep takes significantly longer
+ * than its target time, it implies that the JVM or host machine has
+ * paused processing, which may cause other problems. If such a pause is
+ * detected, the thread logs a message.
+ */
+public class JvmPauseMonitor {
+    private static final Logger LOG = 
LoggerFactory.getLogger(JvmPauseMonitor.class);
+
+    public static final String JVM_PAUSE_MONITOR_FEATURE_SWITCH_KEY = 
"jvm.pause.monitor";
+
+    /** The target sleep time */
+    protected long sleepTimeMs;
+    public static final String SLEEP_TIME_MS_KEY = "jvm.pause.sleep.time.ms";
+    public static final long SLEEP_TIME_MS_DEFAULT = 500;
+
+    /** log WARN if we detect a pause longer than this threshold */
+    protected long warnThresholdMs;
+    public static final String WARN_THRESHOLD_KEY = 
"jvm.pause.warn-threshold.ms";
+    public static final long WARN_THRESHOLD_DEFAULT = 10000;
+
+    /** log INFO if we detect a pause longer than this threshold */
+    protected long infoThresholdMs;
+    public static final String INFO_THRESHOLD_KEY = 
"jvm.pause.info-threshold.ms";
+    public static final long INFO_THRESHOLD_DEFAULT = 1000;
+
+    private long numGcWarnThresholdExceeded = 0;
+    private long numGcInfoThresholdExceeded = 0;
+    private long totalGcExtraSleepTime = 0;
+
+    private Thread monitorThread;
+    private volatile boolean shouldRun = true;
+
+    public JvmPauseMonitor(QuorumPeerConfig config) {
+        this.warnThresholdMs = config.getJvmPauseWarnThresholdMs();
+        this.infoThresholdMs = config.getJvmPauseInfoThresholdMs();
+        this.sleepTimeMs = config.getJvmPauseSleepTimeMs();
+    }
+
+    public JvmPauseMonitor(ServerConfig config) {
+        this.warnThresholdMs = config.getJvmPauseWarnThresholdMs();
+        this.infoThresholdMs = config.getJvmPauseInfoThresholdMs();
+        this.sleepTimeMs = config.getJvmPauseSleepTimeMs();
+    }
+
+    public void serviceStart() {
+        monitorThread = new Thread(new JVMMonitor());
+        monitorThread.setDaemon(true);
+        monitorThread.start();
+    }
+
+    public void serviceStop() {
+        shouldRun = false;
+        if (monitorThread != null) {
+            monitorThread.interrupt();
+            try {
+                monitorThread.join();
+            } catch (InterruptedException e) {
+                Thread.currentThread().interrupt();
+            }
+        }
+    }
+
+    public boolean isStarted() {
+        return monitorThread != null;
+    }
+
+    public long getNumGcWarnThresholdExceeded() {
+        return numGcWarnThresholdExceeded;
+    }
+
+    public long getNumGcInfoThresholdExceeded() {
+        return numGcInfoThresholdExceeded;
+    }
+
+    public long getTotalGcExtraSleepTime() {
+        return totalGcExtraSleepTime;
+    }
+
+    private String formatMessage(long extraSleepTime,
+                                 Map<String, GcTimes> gcTimesAfterSleep,
+                                 Map<String, GcTimes> gcTimesBeforeSleep) {
+
+        Set<String> gcBeanNames = new HashSet<>(gcTimesAfterSleep.keySet());
+        gcBeanNames.retainAll(gcTimesBeforeSleep.keySet());
+        List<String> gcDiffs = new ArrayList<>();
+
+        for (String name : gcBeanNames) {
+            GcTimes diff = 
gcTimesAfterSleep.get(name).subtract(gcTimesBeforeSleep.get(name));
+            if (diff.gcCount != 0) {
+                gcDiffs.add("GC pool '" + name + "' had collection(s): " + 
diff.toString());
+            }
+        }
+
+        String ret = String.format("Detected pause in JVM or host machine (eg 
GC): pause of approximately %d ms, " +
+                "total pause: info level: %d, warn level: %d %n",
+                extraSleepTime, numGcInfoThresholdExceeded, 
numGcWarnThresholdExceeded);
+        if (gcDiffs.isEmpty()) {
+            ret += ("No GCs detected");
+        } else {
+            ret += String.join("\n", gcDiffs);
+        }
+        return ret;
+    }
+
+    private Map<String, GcTimes> getGcTimes() {
+        Map<String, GcTimes> map = new HashMap<>();
+        List<GarbageCollectorMXBean> gcBeans = 
ManagementFactory.getGarbageCollectorMXBeans();
+        for (GarbageCollectorMXBean gcBean : gcBeans) {
+            map.put(gcBean.getName(), new GcTimes(gcBean));
+        }
+        return map;
+    }
+
+    private static class GcTimes {
+
+        private long gcCount;
+        private long gcTimeMillis;
+
+        private GcTimes(GarbageCollectorMXBean gcBean) {
+            gcCount = gcBean.getCollectionCount();
+            gcTimeMillis = gcBean.getCollectionTime();
+        }
+
+        private GcTimes(long count, long time) {
+            this.gcCount = count;
+            this.gcTimeMillis = time;
+        }
+
+        private GcTimes subtract(GcTimes other) {
+            return new GcTimes(this.gcCount - other.gcCount,
+                    this.gcTimeMillis - other.gcTimeMillis);
+        }
+
+        public String toString() {
+            return "count=" + gcCount + " time=" + gcTimeMillis + "ms";
+        }
+
+    }
+
+    private class JVMMonitor implements Runnable {
+        @Override
+        public void run() {
+            Map<String, GcTimes> gcTimesBeforeSleep = getGcTimes();
+            LOG.info("Starting JVM Pause Monitor with infoThresholdMs:{} 
warnThresholdMs:{} and sleepTimeMs:{}",
+                    infoThresholdMs, warnThresholdMs, sleepTimeMs);
+            while (shouldRun) {
+                long startTime = Instant.now().toEpochMilli();
+                try {
+                    Thread.sleep(sleepTimeMs);
+                } catch (InterruptedException ie) {
+                    return;
+                }
+                long endTime = Instant.now().toEpochMilli();
+                long extraSleepTime = (endTime - startTime) - sleepTimeMs;
+                Map<String, GcTimes> gcTimesAfterSleep = getGcTimes();
+
+                if (extraSleepTime > warnThresholdMs) {
+                    ++numGcWarnThresholdExceeded;
+                    LOG.warn(formatMessage(extraSleepTime, gcTimesAfterSleep, 
gcTimesBeforeSleep));
+                } else if (extraSleepTime > infoThresholdMs) {
+                    ++numGcInfoThresholdExceeded;
+                    LOG.info(formatMessage(extraSleepTime, gcTimesAfterSleep, 
gcTimesBeforeSleep));
+                }
+                totalGcExtraSleepTime += extraSleepTime;
+                gcTimesBeforeSleep = gcTimesAfterSleep;
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git 
a/zookeeper-server/src/test/java/org/apache/zookeeper/ServerConfigTest.java 
b/zookeeper-server/src/test/java/org/apache/zookeeper/ServerConfigTest.java
index 27faa74..e542670 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/ServerConfigTest.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/ServerConfigTest.java
@@ -19,12 +19,15 @@
 package org.apache.zookeeper;
 
 import org.apache.zookeeper.server.ServerConfig;
+import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
 import org.junit.Before;
 import org.junit.Test;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
 
 import java.io.File;
 
@@ -60,6 +63,26 @@ public class ServerConfigTest {
         serverConfig.parse(args);
     }
 
+    @Test
+    public void testJvmPauseMonitorConfigured() {
+        final Long sleepTime = 444L;
+        final Long warnTH = 5555L;
+        final Long infoTH = 555L;
+
+        QuorumPeerConfig qpConfig = mock(QuorumPeerConfig.class);
+        when(qpConfig.isJvmPauseMonitorToRun()).thenReturn(true);
+        when(qpConfig.getJvmPauseSleepTimeMs()).thenReturn(sleepTime);
+        when(qpConfig.getJvmPauseWarnThresholdMs()).thenReturn(warnTH);
+        when(qpConfig.getJvmPauseInfoThresholdMs()).thenReturn(infoTH);
+
+        serverConfig.readFrom(qpConfig);
+
+        assertEquals(sleepTime, 
Long.valueOf(serverConfig.getJvmPauseSleepTimeMs()));
+        assertEquals(warnTH, 
Long.valueOf(serverConfig.getJvmPauseWarnThresholdMs()));
+        assertEquals(infoTH, 
Long.valueOf(serverConfig.getJvmPauseInfoThresholdMs()));
+        assertTrue(serverConfig.isJvmPauseMonitorToRun());
+    }
+
     boolean checkEquality(String a, String b) {
         assertNotNull(a);
         assertNotNull(b);
diff --git 
a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/QuorumPeerConfigTest.java
 
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/QuorumPeerConfigTest.java
index 5dfd2d0..d56a618 100644
--- 
a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/QuorumPeerConfigTest.java
+++ 
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/QuorumPeerConfigTest.java
@@ -20,6 +20,7 @@ package org.apache.zookeeper.server.quorum;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 import java.io.File;
@@ -115,6 +116,28 @@ public class QuorumPeerConfigTest {
         quorumPeerConfig.parseProperties(zkProp);
     }
 
+    @Test
+    public void testJvmPauseMonitorConfigured()
+            throws IOException, ConfigException {
+        final Long sleepTime = 444L;
+        final Long warnTH = 5555L;
+        final Long infoTH = 555L;
+
+        QuorumPeerConfig quorumPeerConfig = new QuorumPeerConfig();
+        Properties zkProp = getDefaultZKProperties();
+        zkProp.setProperty("dataDir", new File("myDataDir").getAbsolutePath());
+        zkProp.setProperty("jvm.pause.monitor", "true");
+        zkProp.setProperty("jvm.pause.sleep.time.ms", sleepTime.toString());
+        zkProp.setProperty("jvm.pause.warn-threshold.ms", warnTH.toString());
+        zkProp.setProperty("jvm.pause.info-threshold.ms", infoTH.toString());
+        quorumPeerConfig.parseProperties(zkProp);
+
+        assertEquals(sleepTime, 
Long.valueOf(quorumPeerConfig.getJvmPauseSleepTimeMs()));
+        assertEquals(warnTH, 
Long.valueOf(quorumPeerConfig.getJvmPauseWarnThresholdMs()));
+        assertEquals(infoTH, 
Long.valueOf(quorumPeerConfig.getJvmPauseInfoThresholdMs()));
+        assertTrue(quorumPeerConfig.isJvmPauseMonitorToRun());
+    }
+
     private Properties getDefaultZKProperties() {
         Properties zkProp = new Properties();
         zkProp.setProperty("dataDir", new File("myDataDir").getAbsolutePath());
diff --git 
a/zookeeper-server/src/test/java/org/apache/zookeeper/server/util/JvmPauseMonitorTest.java
 
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/util/JvmPauseMonitorTest.java
new file mode 100644
index 0000000..4e701b8
--- /dev/null
+++ 
b/zookeeper-server/src/test/java/org/apache/zookeeper/server/util/JvmPauseMonitorTest.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.server.util;
+
+import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class JvmPauseMonitorTest {
+
+    private final Long sleepTime = 100L;
+    private final Long infoTH = -1L;
+    private final Long warnTH = -1L;
+    private JvmPauseMonitor pauseMonitor;
+
+    @Test(timeout=5000)
+    public void testJvmPauseMonitorExceedInfoThreshold() throws 
InterruptedException {
+        QuorumPeerConfig qpConfig = mock(QuorumPeerConfig.class);
+        when(qpConfig.getJvmPauseSleepTimeMs()).thenReturn(sleepTime);
+        when(qpConfig.getJvmPauseInfoThresholdMs()).thenReturn(infoTH);
+
+        pauseMonitor = new JvmPauseMonitor(qpConfig);
+        pauseMonitor.serviceStart();
+
+        Assert.assertEquals(sleepTime, Long.valueOf(pauseMonitor.sleepTimeMs));
+        Assert.assertEquals(infoTH, 
Long.valueOf(pauseMonitor.infoThresholdMs));
+
+        while(pauseMonitor.getNumGcInfoThresholdExceeded() == 0) {
+            Thread.sleep(200);
+        }
+    }
+
+    @Test(timeout=5000)
+    public void testJvmPauseMonitorExceedWarnThreshold() throws 
InterruptedException {
+        QuorumPeerConfig qpConfig = mock(QuorumPeerConfig.class);
+        when(qpConfig.getJvmPauseSleepTimeMs()).thenReturn(sleepTime);
+        when(qpConfig.getJvmPauseWarnThresholdMs()).thenReturn(warnTH);
+
+        pauseMonitor = new JvmPauseMonitor(qpConfig);
+        pauseMonitor.serviceStart();
+
+        Assert.assertEquals(sleepTime, Long.valueOf(pauseMonitor.sleepTimeMs));
+        Assert.assertEquals(warnTH, 
Long.valueOf(pauseMonitor.warnThresholdMs));
+
+        while(pauseMonitor.getNumGcWarnThresholdExceeded() == 0) {
+            Thread.sleep(200);
+        }
+
+    }
+
+    @After
+    public void teardown() {
+        pauseMonitor.serviceStop();
+    }
+}

Reply via email to