Rely on the JVM to handle OutOfMemoryErrors

patch by Benjamin Lerer; reviewed by Joshua McKenzie for CASSANDRA-13006


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/02aba734
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/02aba734
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/02aba734

Branch: refs/heads/cassandra-3.0
Commit: 02aba7343ce300397ab672bbb1788aa8182d8a48
Parents: 3cd2c3c
Author: Benjamin Lerer <b.le...@gmail.com>
Authored: Tue Dec 12 10:21:05 2017 +0100
Committer: Benjamin Lerer <b.le...@gmail.com>
Committed: Tue Dec 12 10:21:05 2017 +0100

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 NEWS.txt                                        |  9 ++-
 bin/cassandra                                   | 19 ++++-
 conf/cassandra-env.ps1                          | 10 +++
 conf/cassandra-env.sh                           | 12 +++
 .../apache/cassandra/service/StartupChecks.java | 74 ++++++++++++++++++
 .../org/apache/cassandra/utils/HeapUtils.java   | 82 ++++----------------
 .../cassandra/utils/JVMStabilityInspector.java  | 24 +++++-
 .../utils/JVMStabilityInspectorTest.java        | 31 +++++---
 9 files changed, 178 insertions(+), 84 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/02aba734/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index c1e81fd..5200eb1 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.2.12
+ * Rely on the JVM to handle OutOfMemoryErrors (CASSANDRA-13006)
  * Grab refs during scrub/index redistribution/cleanup (CASSANDRA-13873)
 
 2.2.11

http://git-wip-us.apache.org/repos/asf/cassandra/blob/02aba734/NEWS.txt
----------------------------------------------------------------------
diff --git a/NEWS.txt b/NEWS.txt
index 3bff458..5747941 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -18,8 +18,13 @@ using the provided 'sstableupgrade' tool.
 
 Upgrading
 ---------
-    - Nothing specific to this release, but please see 2.2 if you are upgrading
-      from a previous version.
+    - Cassandra is now relying on the JVM options to properly shutdown on 
OutOfMemoryError. By default it will
+      rely on the OnOutOfMemoryError option as the ExitOnOutOfMemoryError and 
CrashOnOutOfMemoryError options
+      are not supported by the older 1.7 and 1.8 JVMs. A warning will be 
logged at startup if none of those JVM
+      options are used. See CASSANDRA-13006 for more details.
+    - Cassandra is not logging anymore by default an Heap histogram on 
OutOfMemoryError. To enable that behavior
+      set the 'cassandra.printHeapHistogramOnOutOfMemoryError' System property 
to 'true'. See CASSANDRA-13006
+      for more details.
 
 2.2.11
 ======

http://git-wip-us.apache.org/repos/asf/cassandra/blob/02aba734/bin/cassandra
----------------------------------------------------------------------
diff --git a/bin/cassandra b/bin/cassandra
index 2dd0fe1..0e337e8 100755
--- a/bin/cassandra
+++ b/bin/cassandra
@@ -28,6 +28,7 @@
 #
 #   CLASSPATH -- A Java classpath containing everything necessary to run.
 #   JVM_OPTS -- Additional arguments to the JVM for heap size, etc
+#   JVM_ON_OUT_OF_MEMORY_ERROR_OPT -- The OnOutOfMemoryError JVM option if 
specified
 #   CASSANDRA_CONF -- Directory containing Cassandra configuration files.
 #
 # As a convenience, a fragment of shell is sourced in order to set one or
@@ -199,12 +200,22 @@ launch_service()
     # to close stdout/stderr, but it's up to us not to background.
     if [ "x$foreground" != "x" ]; then
         cassandra_parms="$cassandra_parms -Dcassandra-foreground=yes"
-        exec $NUMACTL "$JAVA" $JVM_OPTS $cassandra_parms -cp "$CLASSPATH" 
$props "$class"
+        if [ "x$JVM_ON_OUT_OF_MEMORY_ERROR_OPT" != "x" ]; then
+            exec $NUMACTL "$JAVA" $JVM_OPTS "$JVM_ON_OUT_OF_MEMORY_ERROR_OPT" 
$cassandra_parms -cp "$CLASSPATH" $props "$class"
+        else
+            exec $NUMACTL "$JAVA" $JVM_OPTS $cassandra_parms -cp "$CLASSPATH" 
$props "$class"
+        fi
     # Startup CassandraDaemon, background it, and write the pid.
     else
-        exec $NUMACTL "$JAVA" $JVM_OPTS $cassandra_parms -cp "$CLASSPATH" 
$props "$class" <&- &
-        [ ! -z "$pidpath" ] && printf "%d" $! > "$pidpath"
-        true
+        if [ "x$JVM_ON_OUT_OF_MEMORY_ERROR_OPT" != "x" ]; then
+            exec $NUMACTL "$JAVA" $JVM_OPTS "$JVM_ON_OUT_OF_MEMORY_ERROR_OPT" 
$cassandra_parms -cp "$CLASSPATH" $props "$class" <&- &
+            [ ! -z "$pidpath" ] && printf "%d" $! > "$pidpath"
+            true
+        else
+            exec $NUMACTL "$JAVA" $JVM_OPTS $cassandra_parms -cp "$CLASSPATH" 
$props "$class" <&- &
+            [ ! -z "$pidpath" ] && printf "%d" $! > "$pidpath"
+            true
+        fi
     fi
 
     return $?

http://git-wip-us.apache.org/repos/asf/cassandra/blob/02aba734/conf/cassandra-env.ps1
----------------------------------------------------------------------
diff --git a/conf/cassandra-env.ps1 b/conf/cassandra-env.ps1
index 321a9ca..7b4a632 100644
--- a/conf/cassandra-env.ps1
+++ b/conf/cassandra-env.ps1
@@ -390,6 +390,16 @@ Function SetCassandraEnvironment
     $env:JVM_OPTS="$env:JVM_OPTS -Xmn$env:HEAP_NEWSIZE"
     $env:JVM_OPTS="$env:JVM_OPTS -XX:+HeapDumpOnOutOfMemoryError"
 
+    # stop the jvm on OutOfMemoryError as it can result in some data corruption
+    # uncomment the preferred option
+    # ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater 
or equals to 1.7 update 101 or 1.8 update 92
+    # $env:JVM_OPTS="$env:JVM_OPTS -XX:+ExitOnOutOfMemoryError"
+    # $env:JVM_OPTS="$env:JVM_OPTS -XX:+CrashOnOutOfMemoryError"
+    $env:JVM_OPTS="$env:JVM_OPTS -XX:OnOutOfMemoryError=""taskkill /F /PID 
%p"""
+
+    # print an heap histogram on OutOfMemoryError
+    # $env:JVM_OPTS="$env:JVM_OPTS 
-Dcassandra.printHeapHistogramOnOutOfMemoryError=true"
+
     # Per-thread stack size.
     $env:JVM_OPTS="$env:JVM_OPTS -Xss256k"
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/02aba734/conf/cassandra-env.sh
----------------------------------------------------------------------
diff --git a/conf/cassandra-env.sh b/conf/cassandra-env.sh
index b519b76..7b1b8d3 100644
--- a/conf/cassandra-env.sh
+++ b/conf/cassandra-env.sh
@@ -204,6 +204,18 @@ fi
 
 startswith() { [ "${1#$2}" != "$1" ]; }
 
+# stop the jvm on OutOfMemoryError as it can result in some data corruption
+# uncomment the preferred option
+# For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash 
commands split words
+# on white spaces without taking quotes into account
+# ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or 
equals to 1.7 update 101 or 1.8 update 92
+# JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError"
+# JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError"
+JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p"
+
+# print an heap histogram on OutOfMemoryError
+# JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true"
+
 # Per-thread stack size.
 JVM_OPTS="$JVM_OPTS -Xss256k"
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/02aba734/src/java/org/apache/cassandra/service/StartupChecks.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/service/StartupChecks.java 
b/src/java/org/apache/cassandra/service/StartupChecks.java
index 34bc824..7ec16d1 100644
--- a/src/java/org/apache/cassandra/service/StartupChecks.java
+++ b/src/java/org/apache/cassandra/service/StartupChecks.java
@@ -19,6 +19,8 @@ package org.apache.cassandra.service;
 
 import java.io.File;
 import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.lang.management.RuntimeMXBean;
 import java.nio.file.*;
 import java.nio.file.attribute.BasicFileAttributes;
 import java.util.*;
@@ -178,6 +180,78 @@ public class StartupChecks
             {
                 logger.warn("Non-Oracle JVM detected.  Some features, such as 
immediate unmap of compacted SSTables, may not work as intended");
             }
+            else
+            {
+                    checkOutOfMemoryHandling();
+            }
+        }
+
+        /**
+         * Checks that the JVM is configured to handle OutOfMemoryError
+         */
+        private void checkOutOfMemoryHandling()
+        {
+            int version = getJavaVersion();
+            int update = getUpdate();
+            // The ExitOnOutOfMemory and CrashOnOutOfMemory are supported 
since the version 7u101 and 8u92
+            boolean jreSupportExitOnOutOfMemory = version > 8
+                                                    || (version == 7 && update 
>= 101)
+                                                    || (version == 8 && update 
>= 92);
+            if (jreSupportExitOnOutOfMemory)
+            {
+                if (!jvmOptionsContainsOneOf("-XX:OnOutOfMemoryError=", 
"-XX:+ExitOnOutOfMemoryError", "-XX:+CrashOnOutOfMemoryError"))
+                    logger.warn("The JVM is not configured to stop on 
OutOfMemoryError which can cause data corruption."
+                                + " Use one of the following JVM options to 
configure the behavior on OutOfMemoryError: "
+                                + " -XX:+ExitOnOutOfMemoryError, 
-XX:+CrashOnOutOfMemoryError, or -XX:OnOutOfMemoryError=\"<cmd args>;<cmd 
args>\"");
+            }
+            else
+            {
+                if (!jvmOptionsContainsOneOf("-XX:OnOutOfMemoryError="))
+                    logger.warn("The JVM is not configured to stop on 
OutOfMemoryError which can cause data corruption."
+                            + " Either upgrade your JRE to a version greater 
or equal to 8u92 and use 
-XX:+ExitOnOutOfMemoryError/-XX:+CrashOnOutOfMemoryError"
+                            + " or use -XX:OnOutOfMemoryError=\"<cmd 
args>;<cmd args>\" on your current JRE.");
+            }
+        }
+
+        /**
+         * Returns the java version number for an Oracle JVM.
+         * @return the java version number
+         */
+        private int getJavaVersion()
+        {
+            String jreVersion = System.getProperty("java.version");
+            String version = jreVersion.startsWith("1.") ? 
jreVersion.substring(2, 3) // Pre 9 version
+                                                         : 
jreVersion.substring(0, jreVersion.indexOf('.'));
+            return Integer.parseInt(version);
+        }
+
+        /**
+         * Return the update number for an Oracle JVM.
+         * @return the update number
+         */
+        private int getUpdate()
+        {
+            String jreVersion = System.getProperty("java.version");
+            int updateSeparatorIndex = jreVersion.indexOf('_');
+            return Integer.parseInt(jreVersion.substring(updateSeparatorIndex 
+ 1));
+        }
+
+        /**
+         * Checks if one of the specified options is being used.
+         * @param optionNames The name of the options to check
+         * @return {@code true} if one of the specified options is being used, 
{@code false} otherwise.
+         */
+        private boolean jvmOptionsContainsOneOf(String... optionNames)
+        {
+            RuntimeMXBean runtimeMxBean = ManagementFactory.getRuntimeMXBean();
+            List<String> inputArguments = runtimeMxBean.getInputArguments();
+            for (String argument : inputArguments)
+            {
+                for (String optionName : optionNames)
+                    if (argument.startsWith(optionName))
+                        return true;
+            }
+            return false;
         }
     };
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/02aba734/src/java/org/apache/cassandra/utils/HeapUtils.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/HeapUtils.java 
b/src/java/org/apache/cassandra/utils/HeapUtils.java
index bfc8a0b..2d068de 100644
--- a/src/java/org/apache/cassandra/utils/HeapUtils.java
+++ b/src/java/org/apache/cassandra/utils/HeapUtils.java
@@ -19,11 +19,6 @@ package org.apache.cassandra.utils;
 
 import java.io.*;
 import java.lang.management.ManagementFactory;
-import java.lang.management.RuntimeMXBean;
-import java.nio.file.FileSystems;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.List;
 
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.commons.lang3.text.StrBuilder;
@@ -32,7 +27,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * Utility to generate heap dumps.
+ * Utility to log heap histogram.
  *
  */
 public final class HeapUtils
@@ -43,54 +38,33 @@ public final class HeapUtils
      * Generates a HEAP dump in the directory specified by the 
<code>HeapDumpPath</code> JVM option
      * or in the <code>CASSANDRA_HOME</code> directory.
      */
-    public static void generateHeapDump()
+    public static void logHeapHistogram()
     {
-        Long processId = getProcessId();
-        if (processId == null)
+        try
         {
-            logger.error("The process ID could not be retrieved. Skipping heap 
dump generation.");
-            return;
-        }
+            logger.info("Trying to log the heap histogram using jmap");
 
-        String heapDumpPath = getHeapDumpPathOption();
-        if (heapDumpPath == null)
-        {
-            String cassandraHome = System.getenv("CASSANDRA_HOME");
-            if (cassandraHome == null)
+            Long processId = getProcessId();
+            if (processId == null)
             {
+                logger.error("The process ID could not be retrieved. Skipping 
heap histogram generation.");
                 return;
             }
 
-            heapDumpPath = cassandraHome;
-        }
+            String jmapPath = getJmapPath();
 
-        Path dumpPath = FileSystems.getDefault().getPath(heapDumpPath);
-        if (Files.isDirectory(dumpPath))
-        {
-            dumpPath = dumpPath.resolve("java_pid" + processId + ".hprof");
-        }
+            // The jmap file could not be found. In this case let's default to 
jmap in the hope that it is in the path.
+            String jmapCommand = jmapPath == null ? "jmap" : jmapPath;
 
-        String jmapPath = getJmapPath();
+            String[] histoCommands = new String[] {jmapCommand,
+                    "-histo",
+                    processId.toString()};
 
-        // The jmap file could not be found. In this case let's default to 
jmap in the hope that it is in the path.
-        String jmapCommand = jmapPath == null ? "jmap" : jmapPath;
-
-        String[] dumpCommands = new String[] {jmapCommand,
-                                              "-dump:format=b,file=" + 
dumpPath,
-                                              processId.toString()};
-
-        // Lets also log the Heap histogram
-        String[] histoCommands = new String[] {jmapCommand,
-                                               "-histo",
-                                               processId.toString()};
-        try
-        {
-            logProcessOutput(Runtime.getRuntime().exec(dumpCommands));
             logProcessOutput(Runtime.getRuntime().exec(histoCommands));
         }
-        catch (IOException e)
+        catch (Throwable e)
         {
-            logger.error("The heap dump could not be generated due to the 
following error: ", e);
+            logger.error("The heap histogram could not be generated due to the 
following error: ", e);
         }
     }
 
@@ -137,32 +111,6 @@ public final class HeapUtils
     }
 
     /**
-     * Retrieves the value of the <code>HeapDumpPath</code> JVM option.
-     * @return the value of the <code>HeapDumpPath</code> JVM option or 
<code>null</code> if the value has not been
-     * specified.
-     */
-    private static String getHeapDumpPathOption()
-    {
-        RuntimeMXBean runtimeMxBean = ManagementFactory.getRuntimeMXBean();
-        List<String> inputArguments = runtimeMxBean.getInputArguments();
-        String heapDumpPathOption = null;
-        for (String argument : inputArguments)
-        {
-            if (argument.startsWith("-XX:HeapDumpPath="))
-            {
-                heapDumpPathOption = argument;
-                // We do not break in case the option has been specified 
several times.
-                // In general it seems that JVMs use the right-most argument 
as the winner.
-            }
-        }
-
-        if (heapDumpPathOption == null)
-            return null;
-
-        return heapDumpPathOption.substring(17, heapDumpPathOption.length());
-    }
-
-    /**
      * Retrieves the process ID or <code>null</code> if the process ID cannot 
be retrieved.
      * @return the process ID or <code>null</code> if the process ID cannot be 
retrieved.
      */

http://git-wip-us.apache.org/repos/asf/cassandra/blob/02aba734/src/java/org/apache/cassandra/utils/JVMStabilityInspector.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/JVMStabilityInspector.java 
b/src/java/org/apache/cassandra/utils/JVMStabilityInspector.java
index f8cb775..0196b04 100644
--- a/src/java/org/apache/cassandra/utils/JVMStabilityInspector.java
+++ b/src/java/org/apache/cassandra/utils/JVMStabilityInspector.java
@@ -21,6 +21,7 @@ import java.io.FileNotFoundException;
 import java.net.SocketException;
 
 import com.google.common.annotations.VisibleForTesting;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -38,6 +39,8 @@ public final class JVMStabilityInspector
     private static final Logger logger = 
LoggerFactory.getLogger(JVMStabilityInspector.class);
     private static Killer killer = new Killer();
 
+    private static Object lock = new Object();
+    private static boolean printingHeapHistogram;
 
     private JVMStabilityInspector() {}
 
@@ -52,8 +55,25 @@ public final class JVMStabilityInspector
         boolean isUnstable = false;
         if (t instanceof OutOfMemoryError)
         {
-            isUnstable = true;
-            HeapUtils.generateHeapDump();
+            if 
(Boolean.getBoolean("cassandra.printHeapHistogramOnOutOfMemoryError"))
+            {
+                // We want to avoid printing multiple time the heap histogram 
if multiple OOM errors happen in a short
+                // time span.
+                synchronized(lock)
+                {
+                    if (printingHeapHistogram)
+                        return;
+                    printingHeapHistogram = true;
+                }
+                HeapUtils.logHeapHistogram();
+            }
+
+            logger.error("OutOfMemory error letting the JVM handle the 
error:", t);
+
+            StorageService.instance.removeShutdownHook();
+            // We let the JVM handle the error. The startup checks should have 
warned the user if it did not configure
+            // the JVM behavior in case of OOM (CASSANDRA-13006).
+            throw (OutOfMemoryError) t;
         }
 
         if (DatabaseDescriptor.getDiskFailurePolicy() == 
Config.DiskFailurePolicy.die)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/02aba734/test/unit/org/apache/cassandra/utils/JVMStabilityInspectorTest.java
----------------------------------------------------------------------
diff --git 
a/test/unit/org/apache/cassandra/utils/JVMStabilityInspectorTest.java 
b/test/unit/org/apache/cassandra/utils/JVMStabilityInspectorTest.java
index 7142f97..f96ac6e 100644
--- a/test/unit/org/apache/cassandra/utils/JVMStabilityInspectorTest.java
+++ b/test/unit/org/apache/cassandra/utils/JVMStabilityInspectorTest.java
@@ -20,14 +20,19 @@ package org.apache.cassandra.utils;
 import org.apache.cassandra.config.Config;
 import org.apache.cassandra.config.DatabaseDescriptor;
 import org.apache.cassandra.io.FSReadError;
+
+import static java.util.Arrays.asList;
+
 import org.junit.Test;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.SocketException;
+import java.util.Arrays;
 
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 public class JVMStabilityInspectorTest
 {
@@ -45,10 +50,6 @@ public class JVMStabilityInspectorTest
             JVMStabilityInspector.inspectThrowable(new IOException());
             assertFalse(killerForTests.wasKilled());
 
-            killerForTests.reset();
-            JVMStabilityInspector.inspectThrowable(new OutOfMemoryError());
-            assertTrue(killerForTests.wasKilled());
-
             
DatabaseDescriptor.setDiskFailurePolicy(Config.DiskFailurePolicy.die);
             killerForTests.reset();
             JVMStabilityInspector.inspectThrowable(new FSReadError(new 
IOException(), "blah"));
@@ -62,11 +63,6 @@ public class JVMStabilityInspectorTest
             killerForTests.reset();
             JVMStabilityInspector.inspectThrowable(new Exception(new 
IOException()));
             assertFalse(killerForTests.wasKilled());
-
-            killerForTests.reset();
-            JVMStabilityInspector.inspectThrowable(new Exception(new 
OutOfMemoryError()));
-            assertTrue(killerForTests.wasKilled());
-
         }
         finally
         {
@@ -77,6 +73,23 @@ public class JVMStabilityInspectorTest
     }
 
     @Test
+    public void testOutOfMemoryHandling()
+    {
+        for (Throwable oom : asList(new OutOfMemoryError(), new Exception(new 
OutOfMemoryError())))
+        {
+            try
+            {
+                JVMStabilityInspector.inspectThrowable(oom);
+                fail("The JVMStabilityInspector should delegate the handling 
of OutOfMemoryErrors to the JVM");
+            }
+            catch (OutOfMemoryError e)
+            {
+                assertTrue(true);
+            }
+        }
+    }
+
+    @Test
     public void fileHandleTest()
     {
         KillerForTests killerForTests = new KillerForTests();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org
For additional commands, e-mail: commits-h...@cassandra.apache.org

Reply via email to