[ https://issues.apache.org/jira/browse/IGNITE-6433?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16287418#comment-16287418 ]
Alexandr Kuramshin commented on IGNITE-6433: -------------------------------------------- The following thread deadlock occurs frequently on node stop causing the whole test suite timeout {noformat} sys-#1524465%dht.GridCachePartitionedNearDisabledOptimisticTxNodeRestartTest0% prio=10 tid=0x00007f082002a800 nid=0x6c13 waiting on condition [0x00007f0793ebc000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x0000000748e6d530> (a java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186) at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:964) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1282) at java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:731) at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.partitionMap(GridDhtPartitionTopologyImpl.java:1162) at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.createPartitionsFullMessage(GridCachePartitionExchangeManager.java:1045) at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.sendAllPartitions(GridCachePartitionExchangeManager.java:981) at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.refreshPartitions(GridCachePartitionExchangeManager.java:964) at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.access$2400(GridCachePartitionExchangeManager.java:131) at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ResendTimeoutObject$1.run(GridCachePartitionExchangeManager.java:2506) at org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:6695) at org.apache.ignite.internal.processors.closure.GridClosureProcessor$1.body(GridClosureProcessor.java:827) at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) sys-#1524152%dht.GridCachePartitionedNearDisabledOptimisticTxNodeRestartTest0% prio=10 tid=0x00007f07bc1eb800 nid=0x6abf waiting on condition [0x00007f08d48ed000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x0000000748e6d9f8> (a java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186) at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:867) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1197) at java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock(ReentrantReadWriteLock.java:945) at org.apache.ignite.internal.util.StripedCompositeReadWriteLock$WriteLock.lock0(StripedCompositeReadWriteLock.java:154) at org.apache.ignite.internal.util.StripedCompositeReadWriteLock$WriteLock.lock(StripedCompositeReadWriteLock.java:123) at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.onEvicted(GridDhtPartitionTopologyImpl.java:2253) at org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPreloader.onPartitionEvicted(GridDhtPreloader.java:461) at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition.finishDestroy(GridDhtLocalPartition.java:731) at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition.clearEvicting(GridDhtLocalPartition.java:699) at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition.tryEvict(GridDhtLocalPartition.java:759) at org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPreloader$3.call(GridDhtPreloader.java:593) at org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPreloader$3.call(GridDhtPreloader.java:580) at org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:6639) at org.apache.ignite.internal.processors.closure.GridClosureProcessor$2.body(GridClosureProcessor.java:967) at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) exchange-worker-#1523966%dht.GridCachePartitionedNearDisabledOptimisticTxNodeRestartTest0% prio=10 tid=0x00007f08bd16a800 nid=0x69f7 waiting on condition [0x00007f0ac4afb000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:315) at org.apache.ignite.internal.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:177) at org.apache.ignite.internal.util.future.GridFutureAdapter.get(GridFutureAdapter.java:140) at org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.detectLostPartitions(GridDhtPartitionTopologyImpl.java:1839) at org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.detectLostPartitions(GridDhtPartitionsExchangeFuture.java:2146) - locked <0x0000000747e37b58> (a java.lang.Object) at org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.finishExchangeOnCoordinator(GridDhtPartitionsExchangeFuture.java:2321) at org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.onAllReceived(GridDhtPartitionsExchangeFuture.java:2208) at org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.distributedExchange(GridDhtPartitionsExchangeFuture.java:1031) at org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.init(GridDhtPartitionsExchangeFuture.java:651) at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2279) at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110) at java.lang.Thread.run(Thread.java:745) test-runner-#1523889%dht.GridCachePartitionedNearDisabledOptimisticTxNodeRestartTest% prio=10 tid=0x00007f0ad9efa000 nid=0x69a6 waiting for monitor entry [0x00007f071e903000] java.lang.Thread.State: BLOCKED (on object monitor) at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.cancel(GridCachePartitionExchangeManager.java:2115) - waiting to lock <0x0000000747e37b58> (a java.lang.Object) at org.apache.ignite.internal.util.IgniteUtils.cancel(IgniteUtils.java:4672) at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.onKernalStop0(GridCachePartitionExchangeManager.java:668) at org.apache.ignite.internal.processors.cache.GridCacheSharedManagerAdapter.onKernalStop(GridCacheSharedManagerAdapter.java:120) at org.apache.ignite.internal.processors.cache.GridCacheProcessor.onKernalStop(GridCacheProcessor.java:913) at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:2234) at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:2182) at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2511) - locked <0x0000000748b04c80> (a org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance) at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2474) at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:361) at org.apache.ignite.Ignition.stop(Ignition.java:224) at org.apache.ignite.testframework.junits.GridAbstractTest.stopGrid(GridAbstractTest.java:1025) at org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1068) at org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1046) at org.apache.ignite.internal.processors.cache.distributed.GridCacheAbstractNodeRestartSelfTest.checkRestartWithTx(GridCacheAbstractNodeRestartSelfTest.java:854) at org.apache.ignite.internal.processors.cache.distributed.GridCacheAbstractNodeRestartSelfTest.testRestartWithTxFourNodesOneBackupsOffheapEvict(GridCacheAbstractNodeRestartSelfTest.java:452) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at junit.framework.TestCase.runTest(TestCase.java:176) at org.apache.ignite.testframework.junits.GridAbstractTest.runTestInternal(GridAbstractTest.java:2000) at org.apache.ignite.testframework.junits.GridAbstractTest.access$000(GridAbstractTest.java:132) at org.apache.ignite.testframework.junits.GridAbstractTest$5.run(GridAbstractTest.java:1915) at java.lang.Thread.run(Thread.java:745) {noformat} > We need to cancel eviction instead of waiting it when we should own a > partition because we had lost it > ------------------------------------------------------------------------------------------------------ > > Key: IGNITE-6433 > URL: https://issues.apache.org/jira/browse/IGNITE-6433 > Project: Ignite > Issue Type: Bug > Affects Versions: 2.1 > Reporter: Eduard Shangareev > > If PartitionLossPolicy.IGNORE is used and we have lost some partition which > would belong to us because of affinity assignment and its state was RENTING > then we would wait for its eviction completing what would hang cluster (the > time of exchange would significantly increase). > Instead of waiting we should cancel eviction and it's all. -- This message was sent by Atlassian JIRA (v6.4.14#64029)