This is an automated email from the ASF dual-hosted git repository.
ibessonov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/ignite-3.git
The following commit(s) were added to refs/heads/main by this push:
new 8981fa201f1 IGNITE-27324 Stop the node in a separate thread in failure
handler (#7235)
8981fa201f1 is described below
commit 8981fa201f194df1ede23fa42c5b3e9a17a8a0d9
Author: Ivan Bessonov <[email protected]>
AuthorDate: Mon Dec 15 17:44:25 2025 +0300
IGNITE-27324 Stop the node in a separate thread in failure handler (#7235)
---
.../failure/handlers/FailureHandlerTest.java | 2 +-
.../ignite/internal/failure/FailureManager.java | 13 +++++--
.../failure/handlers/StopNodeFailureHandler.java | 18 ++++++++--
.../handlers/StopNodeOrHaltFailureHandler.java | 42 ++++++++++++++++------
.../internal/failure/FailureProcessorTest.java | 4 +--
.../FailureProcessorThreadDumpThrottlingTest.java | 4 +--
.../org/apache/ignite/internal/app/IgniteImpl.java | 2 +-
7 files changed, 64 insertions(+), 21 deletions(-)
diff --git
a/modules/failure-handler/src/integrationTest/java/org/apache/ignite/internal/failure/handlers/FailureHandlerTest.java
b/modules/failure-handler/src/integrationTest/java/org/apache/ignite/internal/failure/handlers/FailureHandlerTest.java
index ae36bb00863..5c7d2c46f82 100755
---
a/modules/failure-handler/src/integrationTest/java/org/apache/ignite/internal/failure/handlers/FailureHandlerTest.java
+++
b/modules/failure-handler/src/integrationTest/java/org/apache/ignite/internal/failure/handlers/FailureHandlerTest.java
@@ -46,7 +46,7 @@ public class FailureHandlerTest extends
ClusterPerTestIntegrationTest {
@Test
void testStopNodeFailureHandler() {
- testFailureHandler(node -> new StopNodeFailureHandler(node::shutdown));
+ testFailureHandler(node -> new StopNodeFailureHandler("test-node",
node::shutdown));
}
@Test
diff --git
a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureManager.java
b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureManager.java
index 1927a74c6ec..a9e9f0741a4 100644
---
a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureManager.java
+++
b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureManager.java
@@ -65,6 +65,9 @@ public class FailureManager implements FailureProcessor,
IgniteComponent {
private static final String IGNORED_FAILURE_LOG_MSG = "Possible failure
suppressed according to a configured handler "
+ "[hnd={}, failureCtx={}, failureCtxId={}]";
+ /** Ignite node name. */
+ private final String nodeName;
+
/** Failure processor configuration. */
private final FailureProcessorConfiguration configuration;
@@ -100,7 +103,9 @@ public class FailureManager implements FailureProcessor,
IgniteComponent {
*
* @param handler Handler.
*/
+ @TestOnly
public FailureManager(FailureHandler handler) {
+ this.nodeName = "test-node";
this.nodeStopper = () -> {};
this.handler = handler;
this.configuration = null;
@@ -109,10 +114,12 @@ public class FailureManager implements FailureProcessor,
IgniteComponent {
/**
* Creates a new instance of a failure processor.
*
+ * @param nodeName Node name.
* @param nodeStopper Node stopper.
* @param configuration Failure processor configuration.
*/
- public FailureManager(NodeStopper nodeStopper,
FailureProcessorConfiguration configuration) {
+ public FailureManager(String nodeName, NodeStopper nodeStopper,
FailureProcessorConfiguration configuration) {
+ this.nodeName = nodeName;
this.nodeStopper = nodeStopper;
this.configuration = configuration;
}
@@ -263,11 +270,11 @@ public class FailureManager implements FailureProcessor,
IgniteComponent {
break;
case StopNodeFailureHandlerConfigurationSchema.TYPE:
- hnd = new StopNodeFailureHandler(nodeStopper);
+ hnd = new StopNodeFailureHandler(nodeName, nodeStopper);
break;
case StopNodeOrHaltFailureHandlerConfigurationSchema.TYPE:
- hnd = new StopNodeOrHaltFailureHandler(nodeStopper,
(StopNodeOrHaltFailureHandlerView) handlerView);
+ hnd = new StopNodeOrHaltFailureHandler(nodeName, nodeStopper,
(StopNodeOrHaltFailureHandlerView) handlerView);
break;
default:
diff --git
a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeFailureHandler.java
b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeFailureHandler.java
index 8f3f349ec02..e97982e9c12 100644
---
a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeFailureHandler.java
+++
b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeFailureHandler.java
@@ -19,6 +19,10 @@ package org.apache.ignite.internal.failure.handlers;
import org.apache.ignite.internal.failure.FailureContext;
import org.apache.ignite.internal.failure.NodeStopper;
+import org.apache.ignite.internal.logger.IgniteLogger;
+import org.apache.ignite.internal.logger.Loggers;
+import org.apache.ignite.internal.thread.IgniteThreadFactory;
+import org.apache.ignite.internal.thread.ThreadOperation;
import org.apache.ignite.internal.tostring.IgniteToStringExclude;
import org.apache.ignite.internal.tostring.S;
@@ -26,16 +30,26 @@ import org.apache.ignite.internal.tostring.S;
* Handler will stop node in case of critical error using provided {@link
NodeStopper}.
*/
public class StopNodeFailureHandler extends AbstractFailureHandler {
+ private static final IgniteLogger LOG =
Loggers.forClass(StopNodeFailureHandler.class);
+
+ /** Ignite node name. */
+ private final String nodeName;
+
@IgniteToStringExclude
private final NodeStopper nodeStopper;
- public StopNodeFailureHandler(NodeStopper nodeStopper) {
+ public StopNodeFailureHandler(String nodeName, NodeStopper nodeStopper) {
+ this.nodeName = nodeName;
this.nodeStopper = nodeStopper;
}
@Override
protected boolean handle(FailureContext failureCtx) {
- nodeStopper.stopNode();
+ IgniteThreadFactory threadFactory =
IgniteThreadFactory.create(nodeName, "node-stopper", true, LOG,
ThreadOperation.values());
+
+ Thread nodeStopperThread =
threadFactory.newThread(nodeStopper::stopNode);
+
+ nodeStopperThread.start();
return true;
}
diff --git
a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeOrHaltFailureHandler.java
b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeOrHaltFailureHandler.java
index 267deea3f8f..43bff07e009 100644
---
a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeOrHaltFailureHandler.java
+++
b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeOrHaltFailureHandler.java
@@ -24,9 +24,11 @@ import org.apache.ignite.internal.failure.NodeStopper;
import
org.apache.ignite.internal.failure.handlers.configuration.StopNodeOrHaltFailureHandlerView;
import org.apache.ignite.internal.logger.IgniteLogger;
import org.apache.ignite.internal.logger.Loggers;
-import org.apache.ignite.internal.thread.LogUncaughtExceptionHandler;
+import org.apache.ignite.internal.thread.IgniteThreadFactory;
+import org.apache.ignite.internal.thread.ThreadOperation;
import org.apache.ignite.internal.tostring.IgniteToStringExclude;
import org.apache.ignite.internal.tostring.S;
+import org.jetbrains.annotations.TestOnly;
/**
* Handler will try to stop node if {@code tryStop} value is {@code true}.
@@ -42,6 +44,9 @@ public class StopNodeOrHaltFailureHandler extends
AbstractFailureHandler {
*/
private static final int KILL_EXIT_CODE = 130;
+ /** Ignite node name. */
+ private String nodeName;
+
/** Node stopper. */
@IgniteToStringExclude
private final NodeStopper nodeStopper;
@@ -59,6 +64,7 @@ public class StopNodeOrHaltFailureHandler extends
AbstractFailureHandler {
* @param tryStop Try stop.
* @param timeout Stop node timeout in milliseconds.
*/
+ @TestOnly
public StopNodeOrHaltFailureHandler(NodeStopper nodeStopper, boolean
tryStop, long timeout) {
this.nodeStopper = nodeStopper;
this.tryStop = tryStop;
@@ -68,10 +74,12 @@ public class StopNodeOrHaltFailureHandler extends
AbstractFailureHandler {
/**
* Creates a new instance of a failure processor.
*
+ * @param nodeName Node name.
* @param nodeStopper Node stopper.
* @param view Configuration view.
*/
- public StopNodeOrHaltFailureHandler(NodeStopper nodeStopper,
StopNodeOrHaltFailureHandlerView view) {
+ public StopNodeOrHaltFailureHandler(String nodeName, NodeStopper
nodeStopper, StopNodeOrHaltFailureHandlerView view) {
+ this.nodeName = nodeName;
this.nodeStopper = nodeStopper;
tryStop = view.tryStop();
timeout = view.timeoutMillis();
@@ -82,18 +90,33 @@ public class StopNodeOrHaltFailureHandler extends
AbstractFailureHandler {
if (tryStop) {
CountDownLatch latch = new CountDownLatch(1);
- Thread stopperThread = new Thread(
+ IgniteThreadFactory stopThreadFactory = IgniteThreadFactory.create(
+ nodeName,
+ "node-stopper",
+ true,
+ LOG,
+ ThreadOperation.values()
+ );
+
+ Thread stopperThread = stopThreadFactory.newThread(
() -> {
nodeStopper.stopNode();
latch.countDown();
- },
- "node-stopper"
+ }
);
- stopperThread.setUncaughtExceptionHandler(new
LogUncaughtExceptionHandler(LOG));
+
stopperThread.start();
- Thread haltOnStopTimeoutThread = new Thread(
+ IgniteThreadFactory haltThreadFactory = IgniteThreadFactory.create(
+ nodeName,
+ "jvm-halt-on-stop-timeout",
+ true,
+ LOG,
+ ThreadOperation.values()
+ );
+
+ Thread haltOnStopTimeoutThread = haltThreadFactory.newThread(
() -> {
try {
if (!latch.await(timeout, TimeUnit.MILLISECONDS)) {
@@ -102,10 +125,9 @@ public class StopNodeOrHaltFailureHandler extends
AbstractFailureHandler {
} catch (InterruptedException e) {
// No-op.
}
- },
- "jvm-halt-on-stop-timeout"
+ }
);
- haltOnStopTimeoutThread.setUncaughtExceptionHandler(new
LogUncaughtExceptionHandler(LOG));
+
haltOnStopTimeoutThread.start();
} else {
Runtime.getRuntime().halt(KILL_EXIT_CODE);
diff --git
a/modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorTest.java
b/modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorTest.java
index 4ff71bb6703..7ffb9b87861 100644
---
a/modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorTest.java
+++
b/modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorTest.java
@@ -94,7 +94,7 @@ class FailureProcessorTest extends BaseIgniteAbstractTest {
@Test
public void testDefaultFailureHandlerConfiguration() {
- FailureManager failureManager = new FailureManager(() -> {},
failureProcessorConfiguration);
+ FailureManager failureManager = new FailureManager("test-node", () ->
{}, failureProcessorConfiguration);
try {
assertThat(failureManager.startAsync(new ComponentContext()),
willSucceedFast());
@@ -114,7 +114,7 @@ class FailureProcessorTest extends BaseIgniteAbstractTest {
@Test
public void testFailureProcessorReconfiguration() {
- FailureManager failureManager = new FailureManager(() -> {},
failureProcessorConfiguration);
+ FailureManager failureManager = new FailureManager("test-node", () ->
{}, failureProcessorConfiguration);
try {
assertThat(failureManager.startAsync(new ComponentContext()),
willSucceedFast());
diff --git
a/modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorThreadDumpThrottlingTest.java
b/modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorThreadDumpThrottlingTest.java
index 963cb922bba..8fc7faebf6a 100644
---
a/modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorThreadDumpThrottlingTest.java
+++
b/modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorThreadDumpThrottlingTest.java
@@ -72,7 +72,7 @@ public class FailureProcessorThreadDumpThrottlingTest extends
BaseIgniteAbstract
logInspector.start();
try {
- FailureManager failureManager = new FailureManager(() -> {},
disabledThreadDumpConfiguration);
+ FailureManager failureManager = new FailureManager("test-node", ()
-> {}, disabledThreadDumpConfiguration);
try {
assertThat(failureManager.startAsync(new ComponentContext()),
willSucceedFast());
@@ -217,7 +217,7 @@ public class FailureProcessorThreadDumpThrottlingTest
extends BaseIgniteAbstract
* Creates a new instance of {@link FailureManager} with the given
configuration and runs the test represented by {@code test} closure.
*/
static void testFailureProcessing(FailureProcessorConfiguration
configuration, Consumer<FailureProcessor> test) {
- FailureManager failureManager = new FailureManager(() -> {},
configuration);
+ FailureManager failureManager = new FailureManager("test-node", () ->
{}, configuration);
try {
assertThat(failureManager.startAsync(new ComponentContext()),
willSucceedFast());
diff --git
a/modules/runner/src/main/java/org/apache/ignite/internal/app/IgniteImpl.java
b/modules/runner/src/main/java/org/apache/ignite/internal/app/IgniteImpl.java
index 0b51be05a1f..89a288788c1 100644
---
a/modules/runner/src/main/java/org/apache/ignite/internal/app/IgniteImpl.java
+++
b/modules/runner/src/main/java/org/apache/ignite/internal/app/IgniteImpl.java
@@ -598,7 +598,7 @@ public class IgniteImpl implements Ignite {
FailureProcessorConfiguration failureProcessorConfiguration =
nodeConfigRegistry.getConfiguration(
FailureProcessorExtensionConfiguration.KEY).failureHandler();
- failureManager = new FailureManager(node::shutdown,
failureProcessorConfiguration);
+ failureManager = new FailureManager(name, node::shutdown,
failureProcessorConfiguration);
SystemLocalConfiguration systemConfiguration =
nodeConfigRegistry.getConfiguration(SystemLocalExtensionConfiguration.KEY).system();