This is an automated email from the ASF dual-hosted git repository. timothyfarkas pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit 54c88a3f56cce20ceb900a3a83bf19c04d93ea1e Author: Timothy Farkas <[email protected]> AuthorDate: Tue Jun 5 13:50:35 2018 -0700 DRILL-6468: CatastrophicFailures should not do a graceful shutdown of drill when terminating the JVM. closes #1306 --- .../apache/drill/common/CatastrophicFailure.java | 2 +- .../org/apache/drill/exec/server/Drillbit.java | 6 ++- .../org/apache/drill/exec/server/FailureUtils.java | 60 ++++++++++++++++++++++ .../apache/drill/exec/work/foreman/Foreman.java | 20 ++++---- .../drill/exec/work/fragment/FragmentExecutor.java | 10 ++-- .../apache/drill/exec/server/TestFailureUtils.java | 46 +++++++++++++++++ .../drill/exec/exception/OutOfMemoryException.java | 4 +- 7 files changed, 131 insertions(+), 17 deletions(-) diff --git a/common/src/main/java/org/apache/drill/common/CatastrophicFailure.java b/common/src/main/java/org/apache/drill/common/CatastrophicFailure.java index 18e5747..6a45daa 100644 --- a/common/src/main/java/org/apache/drill/common/CatastrophicFailure.java +++ b/common/src/main/java/org/apache/drill/common/CatastrophicFailure.java @@ -30,7 +30,7 @@ public class CatastrophicFailure { * Exit the VM as we hit a catastrophic failure. * @param e * The Throwable that occurred - * @param name + * @param message * A descriptive message * @param code * An error code to exit the JVM with. diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/Drillbit.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/Drillbit.java index 7b1d617..de11ddb 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/Drillbit.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/Drillbit.java @@ -360,6 +360,11 @@ public class Drillbit implements AutoCloseable { @Override public void run() { + if (FailureUtils.hadUnrecoverableFailure()) { + // We cannot close drill cleanly in this case. + return; + } + logger.info("Received shutdown request."); try { /* @@ -453,5 +458,4 @@ public class Drillbit implements AutoCloseable { // return as-is return s; } - } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/FailureUtils.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/FailureUtils.java new file mode 100644 index 0000000..4d2fa4f --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/FailureUtils.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.server; + +import io.netty.util.internal.OutOfDirectMemoryError; +import org.apache.drill.common.CatastrophicFailure; +import org.apache.drill.exec.exception.OutOfMemoryException; + +public final class FailureUtils { + private static volatile boolean unrecoverableFailure; + + public static final int EXIT_CODE_HEAP_OOM = -1; + public static final int EXIT_CODE_JAVA_ERROR = -2; + + /** + * This message is used to distinguish between direct memory and heap memory {@link OutOfMemoryError}s. + */ + public static final String DIRECT_MEMORY_OOM_MESSAGE = "Direct buffer memory"; + + private FailureUtils() { + // Don't instantiate + } + + public static boolean isDirectMemoryOOM(Throwable e) { + if (e instanceof OutOfDirectMemoryError || e instanceof OutOfMemoryException) { + // These are always direct memory errors + return true; + } + + return (e instanceof OutOfMemoryError) && DIRECT_MEMORY_OOM_MESSAGE.equals(e.getMessage()); + } + + public static boolean isHeapOOM(Throwable e) { + return (e instanceof OutOfMemoryError) && !DIRECT_MEMORY_OOM_MESSAGE.equals(e.getMessage()); + } + + public static void unrecoverableFailure(Throwable e, String message, int exitCode) { + unrecoverableFailure = true; + CatastrophicFailure.exit(e, message, exitCode); + } + + public static boolean hadUnrecoverableFailure() { + return unrecoverableFailure; + } +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/work/foreman/Foreman.java b/exec/java-exec/src/main/java/org/apache/drill/exec/work/foreman/Foreman.java index 5633022..16e8eb0 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/work/foreman/Foreman.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/work/foreman/Foreman.java @@ -23,7 +23,6 @@ import com.google.protobuf.InvalidProtocolBufferException; import io.netty.channel.ChannelFuture; import io.netty.util.concurrent.Future; import io.netty.util.concurrent.GenericFutureListener; -import org.apache.drill.common.CatastrophicFailure; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.common.exceptions.UserException; import org.apache.drill.common.logical.LogicalPlan; @@ -55,6 +54,7 @@ import org.apache.drill.exec.rpc.BaseRpcOutcomeListener; import org.apache.drill.exec.rpc.RpcException; import org.apache.drill.exec.rpc.UserClientConnection; import org.apache.drill.exec.server.DrillbitContext; +import org.apache.drill.exec.server.FailureUtils; import org.apache.drill.exec.server.options.OptionManager; import org.apache.drill.exec.testing.ControlsInjector; import org.apache.drill.exec.testing.ControlsInjectorFactory; @@ -70,6 +70,8 @@ import java.io.IOException; import java.util.Date; import java.util.List; +import static org.apache.drill.exec.server.FailureUtils.EXIT_CODE_HEAP_OOM; + /** * Foreman manages all the fragments (local and remote) for a single query where this * is the driving/root node. @@ -273,25 +275,23 @@ public class Foreman implements Runnable { throw new IllegalStateException(); } injector.injectChecked(queryContext.getExecutionControls(), "run-try-end", ForemanException.class); - } catch (final OutOfMemoryException e) { - queryStateProcessor.moveToState(QueryState.FAILED, UserException.memoryError(e).build(logger)); } catch (final ForemanException e) { queryStateProcessor.moveToState(QueryState.FAILED, e); - } catch (AssertionError | Exception ex) { - queryStateProcessor.moveToState(QueryState.FAILED, - new ForemanException("Unexpected exception during fragment initialization: " + ex.getMessage(), ex)); - } catch (final OutOfMemoryError e) { - if ("Direct buffer memory".equals(e.getMessage())) { - queryStateProcessor.moveToState(QueryState.FAILED, UserException.resourceError(e).message("One or more nodes ran out of memory while executing the query.").build(logger)); + } catch (final OutOfMemoryError | OutOfMemoryException e) { + if (FailureUtils.isDirectMemoryOOM(e)) { + queryStateProcessor.moveToState(QueryState.FAILED, UserException.memoryError(e).build(logger)); } else { /* * FragmentExecutors use a DrillbitStatusListener to watch out for the death of their query's Foreman. So, if we * die here, they should get notified about that, and cancel themselves; we don't have to attempt to notify * them, which might not work under these conditions. */ - CatastrophicFailure.exit(e, "Unable to handle out of memory condition in Foreman.", -1); + FailureUtils.unrecoverableFailure(e, "Unable to handle out of memory condition in Foreman.", EXIT_CODE_HEAP_OOM); } + } catch (AssertionError | Exception ex) { + queryStateProcessor.moveToState(QueryState.FAILED, + new ForemanException("Unexpected exception during fragment initialization: " + ex.getMessage(), ex)); } finally { // restore the thread's original name currentThread.setName(originalName); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java index 48d8a05..a745bf3 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java @@ -25,7 +25,6 @@ import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; -import org.apache.drill.common.CatastrophicFailure; import org.apache.drill.common.DeferredException; import org.apache.drill.common.EventProcessor; import org.apache.drill.common.exceptions.UserException; @@ -43,12 +42,15 @@ import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint; import org.apache.drill.exec.proto.ExecProtos.FragmentHandle; import org.apache.drill.exec.proto.UserBitShared.FragmentState; import org.apache.drill.exec.proto.helper.QueryIdHelper; +import org.apache.drill.exec.server.FailureUtils; import org.apache.drill.exec.testing.ControlsInjector; import org.apache.drill.exec.testing.ControlsInjectorFactory; import org.apache.drill.exec.util.ImpersonationUtil; import org.apache.drill.exec.work.foreman.DrillbitStatusListener; import org.apache.hadoop.security.UserGroupInformation; +import static org.apache.drill.exec.server.FailureUtils.EXIT_CODE_HEAP_OOM; + /** * <h2>Overview</h2> * <p> @@ -300,11 +302,11 @@ public class FragmentExecutor implements Runnable { }); } catch (OutOfMemoryError | OutOfMemoryException e) { - if (!(e instanceof OutOfMemoryError) || "Direct buffer memory".equals(e.getMessage())) { + if (FailureUtils.isDirectMemoryOOM(e)) { fail(UserException.memoryError(e).build(logger)); } else { - // we have a heap out of memory error. The JVM in unstable, exit. - CatastrophicFailure.exit(e, "Unable to handle out of memory condition in FragmentExecutor.", -2); + // we have a heap out of memory error. The JVM is unstable, exit. + FailureUtils.unrecoverableFailure(e, "Unable to handle out of memory condition in FragmentExecutor.", EXIT_CODE_HEAP_OOM); } } catch (InterruptedException e) { // Swallow interrupted exceptions since we intentionally interrupt the root when cancelling a query diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/server/TestFailureUtils.java b/exec/java-exec/src/test/java/org/apache/drill/exec/server/TestFailureUtils.java new file mode 100644 index 0000000..6736b46 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/server/TestFailureUtils.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.server; + +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +import static org.apache.drill.exec.server.FailureUtils.DIRECT_MEMORY_OOM_MESSAGE; + +public class TestFailureUtils { + @Test + public void testIsDirectMemoryOOM() { + Assert.assertTrue(FailureUtils.isDirectMemoryOOM(new OutOfMemoryException())); + Assert.assertFalse(FailureUtils.isDirectMemoryOOM(new OutOfMemoryError())); + Assert.assertFalse(FailureUtils.isDirectMemoryOOM(new OutOfMemoryError("Heap went boom."))); + Assert.assertTrue(FailureUtils.isDirectMemoryOOM(new OutOfMemoryError(DIRECT_MEMORY_OOM_MESSAGE))); + } + + @Test + public void testIsHeapOOM() { + Assert.assertTrue(FailureUtils.isHeapOOM(new OutOfMemoryError())); + Assert.assertTrue(FailureUtils.isHeapOOM(new OutOfMemoryError("Heap went boom."))); + Assert.assertFalse(FailureUtils.isHeapOOM(new OutOfMemoryError(DIRECT_MEMORY_OOM_MESSAGE))); + Assert.assertFalse(FailureUtils.isHeapOOM(new IOException())); + Assert.assertFalse(FailureUtils.isHeapOOM(new NullPointerException())); + Assert.assertFalse(FailureUtils.isHeapOOM(new OutOfMemoryException())); + } +} diff --git a/exec/memory/base/src/main/java/org/apache/drill/exec/exception/OutOfMemoryException.java b/exec/memory/base/src/main/java/org/apache/drill/exec/exception/OutOfMemoryException.java index 01cf6de..f19a0d1 100644 --- a/exec/memory/base/src/main/java/org/apache/drill/exec/exception/OutOfMemoryException.java +++ b/exec/memory/base/src/main/java/org/apache/drill/exec/exception/OutOfMemoryException.java @@ -17,7 +17,9 @@ */ package org.apache.drill.exec.exception; - +/** + * This is thrown in various cases when Drill cannot allocate Direct Memory. <b>Note: </b> This does <b>NOT</b> get thrown when we run out of heap memory. + */ public class OutOfMemoryException extends RuntimeException { private static final long serialVersionUID = -6858052345185793382L; -- To stop receiving notification emails like this one, please contact [email protected].
