This is an automated email from the ASF dual-hosted git repository.
sanpwc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/ignite-3.git
The following commit(s) were added to refs/heads/main by this push:
new 3ac90222b76 IGNITE-27215 Fix Critical system error caused by
HandshakeException on cluster stop (#7117)
3ac90222b76 is described below
commit 3ac90222b769a80a85ff999e1de897bf0b7278ff
Author: Denis Chudov <[email protected]>
AuthorDate: Mon Dec 1 17:56:27 2025 +0400
IGNITE-27215 Fix Critical system error caused by HandshakeException on
cluster stop (#7117)
---
.../ignite/internal/util/ExceptionUtilsTest.java | 19 +++++++++++++++++++
.../replicator/PartitionReplicaLifecycleManager.java | 19 ++++++++++++++++++-
2 files changed, 37 insertions(+), 1 deletion(-)
diff --git
a/modules/core/src/test/java/org/apache/ignite/internal/util/ExceptionUtilsTest.java
b/modules/core/src/test/java/org/apache/ignite/internal/util/ExceptionUtilsTest.java
index 947d77eb0c0..c5336e82d74 100644
---
a/modules/core/src/test/java/org/apache/ignite/internal/util/ExceptionUtilsTest.java
+++
b/modules/core/src/test/java/org/apache/ignite/internal/util/ExceptionUtilsTest.java
@@ -21,7 +21,11 @@ import static
org.apache.ignite.internal.util.ExceptionUtils.hasCause;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.isA;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import java.io.IOException;
+import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutionException;
import org.junit.jupiter.api.Test;
@@ -39,4 +43,19 @@ class ExceptionUtilsTest {
assertThat(copied, isA(ExceptionInInitializerError.class));
assertThat(hasCause(copied, IllegalArgumentException.class), is(true));
}
+
+ @Test
+ void hasCauseTest() {
+ var e0 = new IllegalStateException();
+ var e1 = new IllegalArgumentException(e0);
+ var e2 = new IOException(e1);
+ var e3 = new CompletionException(e2);
+
+ assertTrue(hasCause(e3, IllegalStateException.class));
+ assertTrue(hasCause(e3, IllegalArgumentException.class));
+ assertTrue(hasCause(e3, IOException.class));
+ assertTrue(hasCause(e3, CompletionException.class));
+
+ assertFalse(hasCause(e3, NullPointerException.class));
+ }
}
diff --git
a/modules/partition-replicator/src/main/java/org/apache/ignite/internal/partition/replicator/PartitionReplicaLifecycleManager.java
b/modules/partition-replicator/src/main/java/org/apache/ignite/internal/partition/replicator/PartitionReplicaLifecycleManager.java
index 17fb5bba0a9..6f5a0adb06a 100644
---
a/modules/partition-replicator/src/main/java/org/apache/ignite/internal/partition/replicator/PartitionReplicaLifecycleManager.java
+++
b/modules/partition-replicator/src/main/java/org/apache/ignite/internal/partition/replicator/PartitionReplicaLifecycleManager.java
@@ -73,6 +73,7 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
+import java.util.concurrent.CancellationException;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executor;
@@ -124,6 +125,7 @@ import org.apache.ignite.internal.metastorage.WatchListener;
import org.apache.ignite.internal.metastorage.dsl.Condition;
import org.apache.ignite.internal.metastorage.dsl.Operation;
import org.apache.ignite.internal.network.InternalClusterNode;
+import org.apache.ignite.internal.network.RecipientLeftException;
import org.apache.ignite.internal.network.TopologyService;
import
org.apache.ignite.internal.partition.replicator.ZoneResourcesManager.ZonePartitionResources;
import org.apache.ignite.internal.partition.replicator.raft.RaftTableProcessor;
@@ -167,6 +169,7 @@ import org.apache.ignite.internal.util.Cursor;
import org.apache.ignite.internal.util.IgniteSpinBusyLock;
import org.apache.ignite.internal.util.IgniteUtils;
import org.apache.ignite.internal.util.PendingComparableValuesTracker;
+import org.apache.ignite.internal.util.TrackerClosedException;
import org.jetbrains.annotations.Nullable;
import org.jetbrains.annotations.TestOnly;
import org.jetbrains.annotations.VisibleForTesting;
@@ -1696,10 +1699,24 @@ public class PartitionReplicaLifecycleManager extends
fut.get();
} catch (Throwable e) {
- failureProcessor.process(new FailureContext(e, "Unable to clean up
zones resources"));
+ if (!isExpectedThrowableDuringResourcesStop(e)) {
+ failureProcessor.process(new FailureContext(e, "Unable to
clean up zones resources"));
+ }
}
}
+ private static boolean isExpectedThrowableDuringResourcesStop(Throwable
throwable) {
+ return hasCause(
+ throwable,
+ NodeStoppingException.class,
+ ComponentStoppingException.class,
+ TrackerClosedException.class,
+ CancellationException.class,
+ // Is possible during cluster stop due to "stale" nodes (nodes
that already left the cluster).
+ RecipientLeftException.class
+ );
+ }
+
private void printPartitionState(Stream<ZonePartitionId> partitionIds) {
List<ZonePartitionId> nonStoppedPartitions = partitionIds
.filter(partId -> replicaMgr.replica(partId) != null)