IGNITE-6171 Native facility to control excessive GC pauses

Signed-off-by: Anton Vinogradov <a...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/ignite/repo
Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/af8cb624
Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/af8cb624
Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/af8cb624

Branch: refs/heads/ignite-7485-2
Commit: af8cb624220398e99954f892ffd12e072524066b
Parents: 2b7623a
Author: Dmitriy Sorokin <cyberde...@bk.ru>
Authored: Wed Jan 31 18:11:07 2018 +0300
Committer: Anton Vinogradov <a...@apache.org>
Committed: Wed Jan 31 18:11:07 2018 +0300

----------------------------------------------------------------------
 .../apache/ignite/IgniteSystemProperties.java   |  12 ++
 .../apache/ignite/internal/IgniteKernal.java    |  21 ++-
 .../ignite/internal/LongJVMPauseDetector.java   | 167 +++++++++++++++++++
 .../org/apache/ignite/mxbean/IgniteMXBean.java  |  25 +++
 4 files changed, 224 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ignite/blob/af8cb624/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java 
b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
index 7761292..2b221a1 100644
--- a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
+++ b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
@@ -770,6 +770,18 @@ public final class IgniteSystemProperties {
      */
     public static final String IGNITE_DATA_STORAGE_FOLDER_BY_CONSISTENT_ID = 
"IGNITE_DATA_STORAGE_FOLDER_BY_CONSISTENT_ID";
 
+    /** Ignite JVM pause detector disabled. */
+    public static final String IGNITE_JVM_PAUSE_DETECTOR_DISABLED = 
"IGNITE_JVM_PAUSE_DETECTOR_DISABLED";
+
+    /** Ignite JVM pause detector precision. */
+    public static final String IGNITE_JVM_PAUSE_DETECTOR_PRECISION = 
"IGNITE_JVM_PAUSE_DETECTOR_PRECISION";
+
+    /** Ignite JVM pause detector threshold. */
+    public static final String IGNITE_JVM_PAUSE_DETECTOR_THRESHOLD = 
"IGNITE_JVM_PAUSE_DETECTOR_THRESHOLD";
+
+    /** Ignite JVM pause detector last events count. */
+    public static final String IGNITE_JVM_PAUSE_DETECTOR_LAST_EVENTS_COUNT = 
"IGNITE_JVM_PAUSE_DETECTOR_LAST_EVENTS_COUNT";
+
     /**
      * Default value is {@code false}.
      */

http://git-wip-us.apache.org/repos/asf/ignite/blob/af8cb624/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java 
b/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java
index 3094963..e637a6b 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java
@@ -130,8 +130,8 @@ import 
org.apache.ignite.internal.processors.cache.persistence.filename.PdsConsi
 import 
org.apache.ignite.internal.processors.cacheobject.IgniteCacheObjectProcessor;
 import org.apache.ignite.internal.processors.closure.GridClosureProcessor;
 import org.apache.ignite.internal.processors.cluster.ClusterProcessor;
-import 
org.apache.ignite.internal.processors.cluster.IGridClusterStateProcessor;
 import org.apache.ignite.internal.processors.cluster.GridClusterStateProcessor;
+import 
org.apache.ignite.internal.processors.cluster.IGridClusterStateProcessor;
 import 
org.apache.ignite.internal.processors.continuous.GridContinuousProcessor;
 import org.apache.ignite.internal.processors.datastreamer.DataStreamProcessor;
 import 
org.apache.ignite.internal.processors.datastructures.DataStructuresProcessor;
@@ -288,6 +288,10 @@ public class IgniteKernal implements IgniteEx, 
IgniteMXBean, Externalizable {
     /** Force complete reconnect future. */
     private static final Object STOP_RECONNECT = new Object();
 
+    static {
+        LongJVMPauseDetector.start();
+    }
+
     /** */
     @GridToStringExclude
     private GridKernalContextImpl ctx;
@@ -460,6 +464,21 @@ public class IgniteKernal implements IgniteEx, 
IgniteMXBean, Externalizable {
     }
 
     /** {@inheritDoc} */
+    @Override public long getLongJVMPausesCount() {
+        return LongJVMPauseDetector.longPausesCount();
+    }
+
+    /** {@inheritDoc} */
+    @Override public long getLongJVMPausesTotalDuration() {
+        return LongJVMPauseDetector.longPausesTotalDuration();
+    }
+
+    /** {@inheritDoc} */
+    @Override public Map<Long, Long> getLongJVMPauseLastEvents() {
+        return LongJVMPauseDetector.longPauseEvents();
+    }
+
+    /** {@inheritDoc} */
     @Override public String getUpTimeFormatted() {
         return X.timeSpan2HMSM(U.currentTimeMillis() - startTime);
     }

http://git-wip-us.apache.org/repos/asf/ignite/blob/af8cb624/modules/core/src/main/java/org/apache/ignite/internal/LongJVMPauseDetector.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/LongJVMPauseDetector.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/LongJVMPauseDetector.java
new file mode 100644
index 0000000..c10b6f9
--- /dev/null
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/LongJVMPauseDetector.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal;
+
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.concurrent.atomic.AtomicReference;
+import org.apache.ignite.IgniteLogger;
+import org.apache.ignite.logger.java.JavaLogger;
+
+import static 
org.apache.ignite.IgniteSystemProperties.IGNITE_JVM_PAUSE_DETECTOR_DISABLED;
+import static 
org.apache.ignite.IgniteSystemProperties.IGNITE_JVM_PAUSE_DETECTOR_LAST_EVENTS_COUNT;
+import static 
org.apache.ignite.IgniteSystemProperties.IGNITE_JVM_PAUSE_DETECTOR_PRECISION;
+import static 
org.apache.ignite.IgniteSystemProperties.IGNITE_JVM_PAUSE_DETECTOR_THRESHOLD;
+import static org.apache.ignite.IgniteSystemProperties.getBoolean;
+import static org.apache.ignite.IgniteSystemProperties.getInteger;
+
+/**
+ * Class for detection of long JVM pauses.
+ * It has a worker thread, which wakes up in cycle every {@code PRECISION} 
(default is 50) milliseconds,
+ * and monitors a time values between awakenings. If worker pause exceeds the 
expected value more than {@code THRESHOLD}
+ * default is 500), the difference is considered as JVM pause, most likely 
STW, and event of long JVM pause is registered.
+ * The values of {@code PRECISION}, {@code THRESHOLD} and {@code EVT_CNT} 
(event window size, default is 20) can be
+ * configured in system or environment properties 
IGNITE_JVM_PAUSE_DETECTOR_PRECISION,
+ * IGNITE_JVM_PAUSE_DETECTOR_THRESHOLD and 
IGNITE_JVM_PAUSE_DETECTOR_LAST_EVENTS_COUNT accordingly.
+ */
+class LongJVMPauseDetector {
+    /** Logger. */
+    private static final IgniteLogger LOG = new JavaLogger();
+
+    /** Worker reference. */
+    private static final AtomicReference<Thread> workerRef = new 
AtomicReference<>();
+
+    /** Precision. */
+    private static final int PRECISION = 
getInteger(IGNITE_JVM_PAUSE_DETECTOR_PRECISION, 50);
+
+    /** Threshold. */
+    private static final int THRESHOLD = 
getInteger(IGNITE_JVM_PAUSE_DETECTOR_THRESHOLD, 500);
+
+    /** Event count. */
+    private static final int EVT_CNT = 
getInteger(IGNITE_JVM_PAUSE_DETECTOR_LAST_EVENTS_COUNT, 20);
+
+    /** Long pause count. */
+    private static long longPausesCnt;
+
+    /** Long pause total duration. */
+    private static long longPausesTotalDuration;
+
+    /** Long pauses timestamps. */
+    private static final long[] longPausesTimestamps = new long[EVT_CNT];
+
+    /** Long pauses durations. */
+    private static final long[] longPausesDurations = new long[EVT_CNT];
+
+    /**
+     * Starts worker if not started yet.
+     */
+    public static void start() {
+        if (getBoolean(IGNITE_JVM_PAUSE_DETECTOR_DISABLED, false)) {
+            if (LOG.isDebugEnabled())
+                LOG.debug("JVM Pause Detector is disabled.");
+
+            return;
+        }
+
+        final Thread worker = new Thread("jvm-pause-detector-worker") {
+            private long prev = System.currentTimeMillis();
+
+            @Override public void run() {
+                if (LOG.isDebugEnabled())
+                    LOG.debug(getName() + " has been started.");
+
+                while (true) {
+                    try {
+                        Thread.sleep(PRECISION);
+
+                        final long now = System.currentTimeMillis();
+                        final long pause = now - PRECISION - prev;
+
+                        prev = now;
+
+                        if (pause >= THRESHOLD) {
+                            LOG.warning("Possible too long JVM pause: " + 
pause + " milliseconds.");
+
+                            synchronized (LongJVMPauseDetector.class) {
+                                final int next = (int)(longPausesCnt % 
EVT_CNT);
+
+                                longPausesCnt++;
+
+                                longPausesTotalDuration += pause;
+
+                                longPausesTimestamps[next] = now;
+
+                                longPausesDurations[next] = pause;
+                            }
+                        }
+                    }
+                    catch (InterruptedException e) {
+                        LOG.error(getName() + " has been interrupted", e);
+
+                        break;
+                    }
+                }
+            }
+        };
+
+        if (!workerRef.compareAndSet(null, worker)) {
+            LOG.warning(LongJVMPauseDetector.class.getSimpleName() + " already 
started!");
+
+            return;
+        }
+
+        worker.setDaemon(true);
+        worker.start();
+    }
+
+    /**
+     * Stops the worker if one is created and running.
+     */
+    public static void stop() {
+        final Thread worker = workerRef.getAndSet(null);
+
+        if (worker != null && worker.isAlive() && !worker.isInterrupted())
+            worker.interrupt();
+    }
+
+    /**
+     * @return Long JVM pauses count.
+     */
+    synchronized static long longPausesCount() {
+        return longPausesCnt;
+    }
+
+    /**
+     * @return Long JVM pauses total duration.
+     */
+    synchronized static long longPausesTotalDuration() {
+        return longPausesTotalDuration;
+    }
+
+    /**
+     * @return Last long JVM pause events.
+     */
+    synchronized static Map<Long, Long> longPauseEvents() {
+        final Map<Long, Long> evts = new TreeMap<>();
+
+        for (int i = 0; i < longPausesTimestamps.length && 
longPausesTimestamps[i] != 0; i++)
+            evts.put(longPausesTimestamps[i], longPausesDurations[i]);
+
+        return evts;
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/af8cb624/modules/core/src/main/java/org/apache/ignite/mxbean/IgniteMXBean.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/mxbean/IgniteMXBean.java 
b/modules/core/src/main/java/org/apache/ignite/mxbean/IgniteMXBean.java
index 428d03c..308c148 100644
--- a/modules/core/src/main/java/org/apache/ignite/mxbean/IgniteMXBean.java
+++ b/modules/core/src/main/java/org/apache/ignite/mxbean/IgniteMXBean.java
@@ -18,6 +18,7 @@
 package org.apache.ignite.mxbean;
 
 import java.util.List;
+import java.util.Map;
 import java.util.UUID;
 import javax.management.JMException;
 
@@ -96,6 +97,30 @@ public interface IgniteMXBean {
     public long getUpTime();
 
     /**
+     * Gets long JVM pauses count.
+     *
+     * @return Long JVM pauses count.
+     */
+    @MXBeanDescription("Long JVM pauses count.")
+    public long getLongJVMPausesCount();
+
+    /**
+     * Gets long JVM pauses total duration.
+     *
+     * @return Long JVM pauses total duration.
+     */
+    @MXBeanDescription("Long JVM pauses total duration.")
+    public long getLongJVMPausesTotalDuration();
+
+    /**
+     * Gets long JVM pause last events.
+     *
+     * @return Long JVM pause last events.
+     */
+    @MXBeanDescription("Long JVM pause last events.")
+    public Map<Long, Long> getLongJVMPauseLastEvents();
+
+    /**
      * Gets a list of formatted user-defined attributes added to this node.
      * <p>
      * Note that grid will add all System properties and environment properties

Reply via email to