This is an automated email from the ASF dual-hosted git repository.

sanpwc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/ignite-3.git


The following commit(s) were added to refs/heads/main by this push:
     new da337a2b8a7 IGNITE-26421 Make node stop time unbounded (#6933)
da337a2b8a7 is described below

commit da337a2b8a78dca8995a2899603c316c410f3505
Author: Denis Chudov <[email protected]>
AuthorDate: Fri Nov 21 11:25:54 2025 +0400

    IGNITE-26421 Make node stop time unbounded (#6933)
---
 .../apache/ignite/internal/thread/ThreadUtils.java |  3 +-
 .../internal/tostring/IgniteToStringBuilder.java   |  2 +-
 .../ignite/internal/failure/FailureManager.java    |  4 +--
 .../PartitionReplicaLifecycleManager.java          | 38 ++++++++++++++++++----
 .../ignite/internal/replicator/ReplicaManager.java |  1 +
 5 files changed, 38 insertions(+), 10 deletions(-)

diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/thread/ThreadUtils.java 
b/modules/core/src/main/java/org/apache/ignite/internal/thread/ThreadUtils.java
index 4a6e84417ae..f64f479a0dd 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/thread/ThreadUtils.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/thread/ThreadUtils.java
@@ -31,6 +31,7 @@ import java.util.HashSet;
 import java.util.Set;
 import org.apache.ignite.internal.lang.IgniteStringFormatter;
 import org.apache.ignite.internal.logger.IgniteLogger;
+import org.jetbrains.annotations.Nullable;
 
 /**
  * This class contains utility methods for working with threads.
@@ -61,7 +62,7 @@ public class ThreadUtils {
      * @param isErrorLevel {@code true} if thread dump must be printed with 
ERROR logging level,
      *      {@code false} if thread dump must be printed with WARN logging 
level.
      */
-    public static void dumpThreads(IgniteLogger log, String message, boolean 
isErrorLevel) {
+    public static void dumpThreads(IgniteLogger log, @Nullable String message, 
boolean isErrorLevel) {
         ThreadMXBean mxBean = ManagementFactory.getThreadMXBean();
 
         Set<Long> deadlockedThreadsIds = getDeadlockedThreadIds(mxBean);
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/tostring/IgniteToStringBuilder.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/tostring/IgniteToStringBuilder.java
index 3a195678846..e86b1c95f83 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/tostring/IgniteToStringBuilder.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/tostring/IgniteToStringBuilder.java
@@ -83,7 +83,7 @@ public class IgniteToStringBuilder {
     private static final Object[] EMPTY_ARRAY = new Object[0];
 
     /** Max collection elements to be written. */
-    private static final int COLLECTION_LIMIT = 100;
+    public static final int COLLECTION_LIMIT = 100;
 
     /** {@link #sensitiveDataPolicy} var handle. */
     private static final VarHandle SENSITIVE_DATA_POLICY;
diff --git 
a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureManager.java
 
b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureManager.java
index 8d0b68a985e..1927a74c6ec 100644
--- 
a/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureManager.java
+++ 
b/modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureManager.java
@@ -201,9 +201,9 @@ public class FailureManager implements FailureProcessor, 
IgniteComponent {
         }
 
         if (dumpThreadsOnFailure && !throttleThreadDump(failureCtx)) {
-            String ctxId = format(" [failureCtxId={}]", failureCtx.id());
+            String ctxIdMsg = format(" [failureCtxId={}]", failureCtx.id());
 
-            ThreadUtils.dumpThreads(LOG, ctxId, 
!handler.ignoredFailureTypes().contains(failureCtx.type()));
+            ThreadUtils.dumpThreads(LOG, ctxIdMsg, 
!handler.ignoredFailureTypes().contains(failureCtx.type()));
         }
 
         boolean invalidated = handler.onFailure(failureCtx);
diff --git 
a/modules/partition-replicator/src/main/java/org/apache/ignite/internal/partition/replicator/PartitionReplicaLifecycleManager.java
 
b/modules/partition-replicator/src/main/java/org/apache/ignite/internal/partition/replicator/PartitionReplicaLifecycleManager.java
index c98a7a1ae03..f3fee7e95cc 100644
--- 
a/modules/partition-replicator/src/main/java/org/apache/ignite/internal/partition/replicator/PartitionReplicaLifecycleManager.java
+++ 
b/modules/partition-replicator/src/main/java/org/apache/ignite/internal/partition/replicator/PartitionReplicaLifecycleManager.java
@@ -21,6 +21,7 @@ import static java.nio.charset.StandardCharsets.UTF_8;
 import static java.util.Collections.emptySet;
 import static java.util.concurrent.CompletableFuture.allOf;
 import static java.util.concurrent.CompletableFuture.completedFuture;
+import static java.util.concurrent.CompletableFuture.delayedExecutor;
 import static java.util.concurrent.CompletableFuture.failedFuture;
 import static java.util.concurrent.CompletableFuture.supplyAsync;
 import static java.util.function.Function.identity;
@@ -44,6 +45,7 @@ import static 
org.apache.ignite.internal.distributionzones.rebalance.ZoneRebalan
 import static 
org.apache.ignite.internal.hlc.HybridTimestamp.LOGICAL_TIME_BITS_SIZE;
 import static org.apache.ignite.internal.hlc.HybridTimestamp.hybridTimestamp;
 import static 
org.apache.ignite.internal.hlc.HybridTimestamp.nullableHybridTimestamp;
+import static org.apache.ignite.internal.lang.IgniteStringFormatter.format;
 import static org.apache.ignite.internal.metastorage.dsl.Conditions.notExists;
 import static org.apache.ignite.internal.metastorage.dsl.Operations.put;
 import static 
org.apache.ignite.internal.partition.replicator.LocalPartitionReplicaEvent.AFTER_REPLICA_DESTROYED;
@@ -54,6 +56,7 @@ import static 
org.apache.ignite.internal.partitiondistribution.Assignments.assig
 import static 
org.apache.ignite.internal.partitiondistribution.PartitionDistributionUtils.calculateAssignmentForPartition;
 import static 
org.apache.ignite.internal.partitiondistribution.PartitionDistributionUtils.calculateAssignments;
 import static org.apache.ignite.internal.raft.PeersAndLearners.fromAssignments;
+import static 
org.apache.ignite.internal.tostring.IgniteToStringBuilder.COLLECTION_LIMIT;
 import static org.apache.ignite.internal.util.ByteUtils.toByteArray;
 import static 
org.apache.ignite.internal.util.CompletableFutures.falseCompletedFuture;
 import static 
org.apache.ignite.internal.util.CompletableFutures.nullCompletedFuture;
@@ -72,7 +75,6 @@ import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executor;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.ScheduledExecutorService;
@@ -108,7 +110,6 @@ import org.apache.ignite.internal.hlc.HybridTimestamp;
 import org.apache.ignite.internal.lang.ByteArray;
 import org.apache.ignite.internal.lang.ComponentStoppingException;
 import org.apache.ignite.internal.lang.IgniteInternalException;
-import org.apache.ignite.internal.lang.IgniteStringFormatter;
 import org.apache.ignite.internal.lang.NodeStoppingException;
 import org.apache.ignite.internal.logger.IgniteLogger;
 import org.apache.ignite.internal.logger.Loggers;
@@ -154,6 +155,8 @@ import org.apache.ignite.internal.schema.SchemaManager;
 import org.apache.ignite.internal.schema.SchemaSyncService;
 import org.apache.ignite.internal.storage.DataStorageManager;
 import org.apache.ignite.internal.storage.engine.StorageEngine;
+import org.apache.ignite.internal.thread.ThreadUtils;
+import org.apache.ignite.internal.tostring.S;
 import org.apache.ignite.internal.tx.TxManager;
 import org.apache.ignite.internal.tx.storage.state.TxStatePartitionStorage;
 import 
org.apache.ignite.internal.tx.storage.state.rocksdb.TxStateRocksDbSharedStorage;
@@ -563,7 +566,7 @@ public class PartitionReplicaLifecycleManager extends
             int partitionCount,
             boolean onNodeRecovery
     ) {
-        assert stableAssignmentsForZone != null : 
IgniteStringFormatter.format("Zone has empty assignments [id={}].", zoneId);
+        assert stableAssignmentsForZone != null : format("Zone has empty 
assignments [id={}].", zoneId);
 
         Supplier<CompletableFuture<Void>> createZoneReplicationNodes = () -> 
inBusyLockAsync(busyLock, () -> {
             var partitionsStartFutures = new 
CompletableFuture<?>[stableAssignmentsForZone.size()];
@@ -1678,12 +1681,35 @@ public class PartitionReplicaLifecycleManager extends
                 .toArray(CompletableFuture[]::new);
 
         try {
-            allOf(stopPartitionsFuture).get(30, TimeUnit.SECONDS);
-        } catch (InterruptedException | ExecutionException | TimeoutException 
e) {
-            LOG.error("Unable to clean up zones resources", e);
+            CompletableFuture<Void> fut = allOf(stopPartitionsFuture);
+
+            delayedExecutor(30, TimeUnit.SECONDS, Runnable::run)
+                    .execute(() -> {
+                        if (!fut.isDone()) {
+                            printPartitionState(partitionIds);
+                        }
+                    });
+
+            fut.get();
+        } catch (Throwable e) {
+            failureProcessor.process(new FailureContext(e, "Unable to clean up 
zones resources"));
         }
     }
 
+    private void printPartitionState(Stream<ZonePartitionId> partitionIds) {
+        List<ZonePartitionId> nonStoppedPartitions = partitionIds
+                .filter(partId -> replicaMgr.replica(partId) != null)
+                .collect(toList());
+
+        int exceedLimit = nonStoppedPartitions.size() - COLLECTION_LIMIT;
+
+        String partitionsStr = "There are still some partitions that are being 
stopped: "
+                + S.toString(nonStoppedPartitions, (sb, e, i) -> 
sb.app(e).app(i < nonStoppedPartitions.size() - 1 ? ", " : ""))
+                + (exceedLimit > 0 ? format(" and {} more; ", exceedLimit) : 
"; ");
+
+        ThreadUtils.dumpThreads(LOG, partitionsStr, false);
+    }
+
     /**
      * Lock the zones replica list for any changes. {@link 
#hasLocalPartition(ZonePartitionId)} must be executed under this lock always.
      *
diff --git 
a/modules/replicator/src/main/java/org/apache/ignite/internal/replicator/ReplicaManager.java
 
b/modules/replicator/src/main/java/org/apache/ignite/internal/replicator/ReplicaManager.java
index 1d92e8ff09f..ec8b2bbaeb8 100644
--- 
a/modules/replicator/src/main/java/org/apache/ignite/internal/replicator/ReplicaManager.java
+++ 
b/modules/replicator/src/main/java/org/apache/ignite/internal/replicator/ReplicaManager.java
@@ -784,6 +784,7 @@ public class ReplicaManager extends 
AbstractEventProducer<LocalReplicaEvent, Loc
      * @param replicationGroupId Table-Partition identifier.
      * @return replica if it was created or null otherwise.
      */
+    @Nullable
     public CompletableFuture<Replica> replica(ReplicationGroupId 
replicationGroupId) {
         return replicas.get(replicationGroupId);
     }

Reply via email to