Repository: ignite
Updated Branches:
  refs/heads/ignite-3212 [created] a05b55a4e


ignite-3212 Dump debug info on exchange timeout


Project: http://git-wip-us.apache.org/repos/asf/ignite/repo
Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/a05b55a4
Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/a05b55a4
Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/a05b55a4

Branch: refs/heads/ignite-3212
Commit: a05b55a4e52ecbc24fefabaf710d854215ac7f7f
Parents: 3c3ed05
Author: sboikov <[email protected]>
Authored: Wed Jun 1 10:33:01 2016 +0300
Committer: sboikov <[email protected]>
Committed: Wed Jun 1 10:40:43 2016 +0300

----------------------------------------------------------------------
 .../apache/ignite/IgniteSystemProperties.java   |  3 ++
 .../GridCachePartitionExchangeManager.java      | 43 +++++++++++++++++---
 .../GridDhtPartitionsExchangeFuture.java        | 42 +++++++++++++++++--
 3 files changed, 79 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ignite/blob/a05b55a4/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java 
b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
index 5f761a2..ab77378 100644
--- a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
+++ b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
@@ -377,6 +377,9 @@ public final class IgniteSystemProperties {
     /** Number of times pending cache objects will be dumped to the log in 
case of partition exchange timeout. */
     public static final String IGNITE_DUMP_PENDING_OBJECTS_THRESHOLD = 
"IGNITE_DUMP_PENDING_OBJECTS_THRESHOLD";
 
+    /** */
+    public static final String IGNITE_THREAD_DUMP_ON_EXCHANGE_TIMEOUT = 
"IGNITE_DUMP_PENDING_OBJECTS_THRESHOLD";
+
     /** JDBC driver cursor remove delay. */
     public static final String IGNITE_JDBC_DRIVER_CURSOR_REMOVE_DELAY = 
"IGNITE_JDBC_DRIVER_CURSOR_RMV_DELAY";
 

http://git-wip-us.apache.org/repos/asf/ignite/blob/a05b55a4/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCachePartitionExchangeManager.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCachePartitionExchangeManager.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCachePartitionExchangeManager.java
index 78a8d6c..0c80423 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCachePartitionExchangeManager.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCachePartitionExchangeManager.java
@@ -33,12 +33,14 @@ import java.util.concurrent.Callable;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ConcurrentSkipListMap;
 import java.util.concurrent.LinkedBlockingDeque;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import org.apache.ignite.IgniteCheckedException;
+import org.apache.ignite.IgniteSystemProperties;
 import org.apache.ignite.cluster.ClusterNode;
 import org.apache.ignite.events.DiscoveryEvent;
 import org.apache.ignite.events.Event;
@@ -90,6 +92,7 @@ import org.jsr166.ConcurrentLinkedDeque8;
 
 import static java.util.concurrent.TimeUnit.MILLISECONDS;
 import static 
org.apache.ignite.IgniteSystemProperties.IGNITE_PRELOAD_RESEND_TIMEOUT;
+import static 
org.apache.ignite.IgniteSystemProperties.IGNITE_THREAD_DUMP_ON_EXCHANGE_TIMEOUT;
 import static org.apache.ignite.IgniteSystemProperties.getLong;
 import static org.apache.ignite.events.EventType.EVT_CACHE_REBALANCE_STARTED;
 import static org.apache.ignite.events.EventType.EVT_NODE_FAILED;
@@ -1106,16 +1109,17 @@ public class GridCachePartitionExchangeManager<K, V> 
extends GridCacheSharedMana
     }
 
     /**
-     *
+     * @throws Exception If failed.
      */
-    public void dumpDebugInfo() {
+    public void dumpDebugInfo() throws Exception {
         dumpDebugInfo(null);
     }
 
     /**
-     *
+     * @param exchTopVer Optional current exchange topology version.
+     * @throws Exception If failed.
      */
-    public void dumpDebugInfo(@Nullable AffinityTopologyVersion exchTopVer) {
+    public void dumpDebugInfo(@Nullable AffinityTopologyVersion exchTopVer) 
throws Exception {
         U.warn(log, "Ready affinity version: " + readyTopVer.get());
 
         U.warn(log, "Last exchange future: " + lastInitializedFut);
@@ -1332,7 +1336,36 @@ public class GridCachePartitionExchangeManager<K, V> 
extends GridCacheSharedMana
 
                             exchFut.init();
 
-                            exchFut.get();
+                            int dumpedObjects = 0;
+
+                            while (true) {
+                                try {
+                                    exchFut.get(2 * 
cctx.gridConfig().getNetworkTimeout(), TimeUnit.MILLISECONDS);
+
+                                    break;
+                                }
+                                catch (IgniteFutureTimeoutCheckedException 
ignored) {
+                                    U.warn(log, "Failed to wait for partition 
map exchange [" +
+                                        "topVer=" + exchFut.topologyVersion() +
+                                        ", node=" + cctx.localNodeId() + "]. " 
+
+                                        "Dumping pending objects that might be 
the cause: ");
+
+                                    if (dumpedObjects < 
GridDhtPartitionsExchangeFuture.DUMP_PENDING_OBJECTS_THRESHOLD) {
+                                        try {
+                                            
dumpDebugInfo(exchFut.topologyVersion());
+                                        }
+                                        catch (Exception e) {
+                                            U.error(log, "Failed to dump debug 
information: " + e, e);
+                                        }
+
+                                        if 
(IgniteSystemProperties.getBoolean(IGNITE_THREAD_DUMP_ON_EXCHANGE_TIMEOUT, 
true))
+                                            U.dumpThreads(log);
+
+                                        dumpedObjects++;
+                                    }
+                                }
+                            }
+
 
                             if (log.isDebugEnabled())
                                 log.debug("After waiting for exchange future 
[exchFut=" + exchFut + ", worker=" +

http://git-wip-us.apache.org/repos/asf/ignite/blob/a05b55a4/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionsExchangeFuture.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionsExchangeFuture.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionsExchangeFuture.java
index b497f58..5e81c22 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionsExchangeFuture.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionsExchangeFuture.java
@@ -73,6 +73,7 @@ import org.apache.ignite.lang.IgniteRunnable;
 import org.jetbrains.annotations.Nullable;
 import org.jsr166.ConcurrentHashMap8;
 
+import static 
org.apache.ignite.IgniteSystemProperties.IGNITE_THREAD_DUMP_ON_EXCHANGE_TIMEOUT;
 import static org.apache.ignite.events.EventType.EVT_NODE_FAILED;
 import static org.apache.ignite.events.EventType.EVT_NODE_JOINED;
 import static org.apache.ignite.events.EventType.EVT_NODE_LEFT;
@@ -85,7 +86,7 @@ import static 
org.apache.ignite.internal.managers.communication.GridIoPolicy.SYS
 public class GridDhtPartitionsExchangeFuture extends 
GridFutureAdapter<AffinityTopologyVersion>
     implements Comparable<GridDhtPartitionsExchangeFuture>, 
GridDhtTopologyFuture {
     /** */
-    private static final int DUMP_PENDING_OBJECTS_THRESHOLD =
+    public static final int DUMP_PENDING_OBJECTS_THRESHOLD =
         
IgniteSystemProperties.getInteger(IgniteSystemProperties.IGNITE_DUMP_PENDING_OBJECTS_THRESHOLD,
 10);
 
     /** */
@@ -891,7 +892,15 @@ public class GridDhtPartitionsExchangeFuture extends 
GridFutureAdapter<AffinityT
         U.warn(log, "Failed to wait for partition release future [topVer=" + 
topologyVersion() +
             ", node=" + cctx.localNodeId() + "]. Dumping pending objects that 
might be the cause: ");
 
-        cctx.exchange().dumpDebugInfo(topologyVersion());
+        try {
+            cctx.exchange().dumpDebugInfo(topologyVersion());
+        }
+        catch (Exception e) {
+            U.error(log, "Failed to dump debug information: " + e, e);
+        }
+
+        if 
(IgniteSystemProperties.getBoolean(IGNITE_THREAD_DUMP_ON_EXCHANGE_TIMEOUT, 
true))
+            U.dumpThreads(log);
     }
 
     /**
@@ -1541,6 +1550,7 @@ public class GridDhtPartitionsExchangeFuture extends 
GridFutureAdapter<AffinityT
                     try {
                         boolean crdChanged = false;
                         boolean allReceived = false;
+                        Set<UUID> reqFrom = null;
 
                         ClusterNode crd0;
 
@@ -1556,8 +1566,13 @@ public class GridDhtPartitionsExchangeFuture extends 
GridFutureAdapter<AffinityT
                                 crd = srvNodes.size() > 0 ? srvNodes.get(0) : 
null;
                             }
 
-                            if (crd != null && crd.isLocal() && rmvd)
-                                allReceived = remaining.isEmpty();
+                            if (crd != null && crd.isLocal()) {
+                                if (rmvd)
+                                    allReceived = remaining.isEmpty();
+
+                                if (crdChanged && !remaining.isEmpty())
+                                    reqFrom = new HashSet<>(remaining);
+                            }
 
                             crd0 = crd;
                         }
@@ -1588,6 +1603,25 @@ public class GridDhtPartitionsExchangeFuture extends 
GridFutureAdapter<AffinityT
                                 return;
                             }
 
+                            if (crdChanged && reqFrom != null) {
+                                GridDhtPartitionsSingleRequest req = new 
GridDhtPartitionsSingleRequest(exchId);
+
+                                for (UUID nodeId : reqFrom) {
+                                    try {
+                                        // It is possible that some nodes 
finished exchange with previous coordinator.
+                                        cctx.io().send(nodeId, req, 
SYSTEM_POOL);
+                                    }
+                                    catch (ClusterTopologyCheckedException e) {
+                                        if (log.isDebugEnabled())
+                                            log.debug("Node left during 
partition exchange [nodeId=" + nodeId +
+                                                ", exchId=" + exchId + ']');
+                                    }
+                                    catch (IgniteCheckedException e) {
+                                        U.error(log, "Failed to request 
partitions from node: " + nodeId, e);
+                                    }
+                                }
+                            }
+
                             for (Map.Entry<ClusterNode, 
GridDhtPartitionsSingleMessage> m : singleMsgs.entrySet())
                                 processMessage(m.getKey(), m.getValue());
                         }

Reply via email to