This is an automated email from the ASF dual-hosted git repository. sk0x50 pushed a commit to branch ignite-20368 in repository https://gitbox.apache.org/repos/asf/ignite-3.git
The following commit(s) were added to refs/heads/ignite-20368 by this push: new 416abea88b IGNITE-20448 StopNodeFailureHandler and StopNodeOrHaltFailureHandler were implemented. (#2646) 416abea88b is described below commit 416abea88b319e45df0413194c0c9140be67b8db Author: Sergey Uttsel <utt...@gmail.com> AuthorDate: Wed Oct 4 16:53:26 2023 +0300 IGNITE-20448 StopNodeFailureHandler and StopNodeOrHaltFailureHandler were implemented. (#2646) --- modules/failure-handler/build.gradle | 1 + .../ignite/internal/failure/FailureProcessor.java | 1 + .../failure/{ => handlers}/FailureHandler.java | 5 +- .../StopNodeFailureHandler.java} | 29 ++--- .../handlers/StopNodeOrHaltFailureHandler.java | 118 +++++++++++++++++++++ .../internal/failure/FailureProcessorTest.java | 1 + modules/runner/build.gradle | 1 + .../failurehandler/FailureHandlerTest.java | 93 ++++++++++++++++ 8 files changed, 235 insertions(+), 14 deletions(-) diff --git a/modules/failure-handler/build.gradle b/modules/failure-handler/build.gradle index b6d8be625a..acc2d9b2ca 100644 --- a/modules/failure-handler/build.gradle +++ b/modules/failure-handler/build.gradle @@ -23,6 +23,7 @@ apply from: "$rootDir/buildscripts/java-test-fixtures.gradle" dependencies { implementation project(':ignite-core') + implementation project(':ignite-api') testImplementation libs.mockito.core testImplementation libs.mockito.junit diff --git a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureProcessor.java b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureProcessor.java index 16d4a6d9ad..35a7ce5bd8 100644 --- a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureProcessor.java +++ b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureProcessor.java @@ -17,6 +17,7 @@ package org.apache.ignite.internal.failure; +import org.apache.ignite.internal.failure.handlers.FailureHandler; import org.apache.ignite.internal.manager.IgniteComponent; /** diff --git a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureHandler.java b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/FailureHandler.java similarity index 88% copy from modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureHandler.java copy to modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/FailureHandler.java index f56aba3130..fab77aad99 100644 --- a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureHandler.java +++ b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/FailureHandler.java @@ -15,7 +15,10 @@ * limitations under the License. */ -package org.apache.ignite.internal.failure; +package org.apache.ignite.internal.failure.handlers; + +import org.apache.ignite.internal.failure.FailureContext; +import org.apache.ignite.internal.failure.FailureProcessor; /** * Provides facility to handle failures. diff --git a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureHandler.java b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeFailureHandler.java similarity index 58% rename from modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureHandler.java rename to modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeFailureHandler.java index f56aba3130..2b8f847d18 100644 --- a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureHandler.java +++ b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeFailureHandler.java @@ -15,20 +15,23 @@ * limitations under the License. */ -package org.apache.ignite.internal.failure; +package org.apache.ignite.internal.failure.handlers; + +import org.apache.ignite.IgnitionManager; +import org.apache.ignite.internal.failure.FailureContext; /** - * Provides facility to handle failures. + * Handler will stop node in case of critical error using {@code IgnitionManager.stop(nodeName)} call. */ -public interface FailureHandler { - /** - * Handles failure occurred on {@code ignite} instance. - * Failure details is contained in {@code failureCtx}. - * Returns {@code true} if Ignite node must be invalidated by {@link FailureProcessor} after calling this method. - * - * @param nodeName Node name. - * @param failureCtx Failure context. - * @return Whether Ignite node must be invalidated or not. - */ - boolean onFailure(String nodeName, FailureContext failureCtx); +public class StopNodeFailureHandler implements FailureHandler { + /** {@inheritDoc} */ + @Override + public boolean onFailure(String nodeName, FailureContext failureCtx) { + new Thread( + () -> IgnitionManager.stop(nodeName), + "node-stopper" + ).start(); + + return true; + } } diff --git a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeOrHaltFailureHandler.java b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeOrHaltFailureHandler.java new file mode 100644 index 0000000000..a8ef031ff6 --- /dev/null +++ b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeOrHaltFailureHandler.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.failure.handlers; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.apache.ignite.IgnitionManager; +import org.apache.ignite.internal.failure.FailureContext; +import org.apache.ignite.internal.tostring.S; + +/** + * Handler will try to stop node if {@code tryStop} value is {@code true}. + * If node can't be stopped during provided {@code timeout} or {@code tryStop} value is {@code false} + * then JVM process will be terminated forcibly using {@code Runtime.getRuntime().halt()}. + */ +public class StopNodeOrHaltFailureHandler implements FailureHandler { + /** + * This is kill code that can be used by external tools, like Shell scripts, + * to auto-stop the Ignite JVM process without restarting. + */ + private static final int KILL_EXIT_CODE = 130; + + /** Try stop. */ + private final boolean tryStop; + + /** Timeout in milliseconds. */ + private final long timeout; + + /** + * Creates a new instance of a failure processor. + */ + public StopNodeOrHaltFailureHandler() { + this(false, 0); + } + + /** + * Creates a new instance of a failure processor. + * + * @param tryStop Try stop. + * @param timeout Stop node timeout in milliseconds. + */ + public StopNodeOrHaltFailureHandler(boolean tryStop, long timeout) { + this.tryStop = tryStop; + this.timeout = timeout; + } + + /** {@inheritDoc} */ + @Override + public boolean onFailure(String nodeName, FailureContext failureCtx) { + if (tryStop) { + CountDownLatch latch = new CountDownLatch(1); + + new Thread( + () -> { + IgnitionManager.stop(nodeName); + + latch.countDown(); + }, + "node-stopper" + ).start(); + + new Thread( + () -> { + try { + if (!latch.await(timeout, TimeUnit.MILLISECONDS)) { + Runtime.getRuntime().halt(KILL_EXIT_CODE); + } + } catch (InterruptedException e) { + // No-op. + } + }, + "jvm-halt-on-stop-timeout" + ).start(); + } else { + Runtime.getRuntime().halt(KILL_EXIT_CODE); + } + + return true; + } + + /** + * Get stop node timeout in milliseconds. + * + * @return Stop node timeout. + */ + public long timeout() { + return timeout; + } + + /** + * Get try stop. + * + * @return Try stop. + */ + public boolean tryStop() { + return tryStop; + } + + /** {@inheritDoc} */ + @Override public String toString() { + return S.toString(StopNodeOrHaltFailureHandler.class, this); + } +} diff --git a/modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorTest.java b/modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorTest.java index 83ae8d971b..279e8c3618 100644 --- a/modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorTest.java +++ b/modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorTest.java @@ -23,6 +23,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import org.apache.ignite.internal.failure.handlers.FailureHandler; import org.apache.ignite.internal.testframework.BaseIgniteAbstractTest; import org.junit.jupiter.api.Test; diff --git a/modules/runner/build.gradle b/modules/runner/build.gradle index ffaf9eabf2..bc0d2a1809 100644 --- a/modules/runner/build.gradle +++ b/modules/runner/build.gradle @@ -149,6 +149,7 @@ dependencies { integrationTestImplementation project(':ignite-placement-driver') integrationTestImplementation project(':ignite-distribution-zones') integrationTestImplementation project(':ignite-system-view') + integrationTestImplementation project(':ignite-failure-handler') integrationTestImplementation testFixtures(project(":ignite-api")) integrationTestImplementation testFixtures(project(':ignite-core')) integrationTestImplementation testFixtures(project(':ignite-configuration')) diff --git a/modules/runner/src/integrationTest/java/org/apache/ignite/internal/failurehandler/FailureHandlerTest.java b/modules/runner/src/integrationTest/java/org/apache/ignite/internal/failurehandler/FailureHandlerTest.java new file mode 100644 index 0000000000..bc5581a83b --- /dev/null +++ b/modules/runner/src/integrationTest/java/org/apache/ignite/internal/failurehandler/FailureHandlerTest.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.failurehandler; + +import static org.apache.ignite.internal.testframework.IgniteTestUtils.assertThrowsWithCause; +import static org.apache.ignite.internal.testframework.IgniteTestUtils.testNodeName; +import static org.apache.ignite.internal.testframework.IgniteTestUtils.waitForCondition; +import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully; +import static org.hamcrest.MatcherAssert.assertThat; + +import java.nio.file.Path; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import org.apache.ignite.Ignite; +import org.apache.ignite.InitParameters; +import org.apache.ignite.internal.IgniteIntegrationTest; +import org.apache.ignite.internal.app.IgniteImpl; +import org.apache.ignite.internal.cluster.management.topology.api.LogicalTopologyService; +import org.apache.ignite.internal.failure.FailureContext; +import org.apache.ignite.internal.failure.FailureType; +import org.apache.ignite.internal.failure.handlers.FailureHandler; +import org.apache.ignite.internal.failure.handlers.StopNodeFailureHandler; +import org.apache.ignite.internal.failure.handlers.StopNodeOrHaltFailureHandler; +import org.apache.ignite.internal.lang.NodeStoppingException; +import org.apache.ignite.internal.testframework.TestIgnitionManager; +import org.apache.ignite.internal.testframework.WorkDirectory; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInfo; + +/** + * Tests for failure handlers. + */ +public class FailureHandlerTest extends IgniteIntegrationTest { + private static final int TIMEOUT_MILLIS = 10_000; + + @WorkDirectory + private Path workDir; + + @Test + void testStopNodeFailureHandler(TestInfo testInfo) throws Exception { + testFailureHandler(new StopNodeFailureHandler(), testInfo); + } + + @Test + void testStopNodeOrHaltFailureHandler(TestInfo testInfo) throws Exception { + testFailureHandler(new StopNodeOrHaltFailureHandler(true, TIMEOUT_MILLIS), testInfo); + } + + void testFailureHandler(FailureHandler hnd, TestInfo testInfo) throws Exception { + String nodeName = testNodeName(testInfo, 0); + + IgniteImpl node = startNode(nodeName); + + LogicalTopologyService topologyService = node.logicalTopologyService(); + + hnd.onFailure(nodeName, new FailureContext(FailureType.CRITICAL_ERROR, null)); + + waitForCondition(() -> topologyService.logicalTopologyOnLeader().isCompletedExceptionally(), TIMEOUT_MILLIS); + + assertThrowsWithCause(() -> topologyService.logicalTopologyOnLeader().join(), NodeStoppingException.class); + } + + private IgniteImpl startNode(String nodeName) { + CompletableFuture<Ignite> future = TestIgnitionManager.start(nodeName, null, workDir); + + InitParameters initParameters = InitParameters.builder() + .destinationNodeName(nodeName) + .metaStorageNodeNames(List.of(nodeName)) + .clusterName("cluster") + .build(); + + TestIgnitionManager.init(initParameters); + + assertThat(future, willCompleteSuccessfully()); + + return (IgniteImpl) future.join(); + } +}