[
https://issues.apache.org/jira/browse/IGNITE-6433?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16287418#comment-16287418
]
Alexandr Kuramshin commented on IGNITE-6433:
--------------------------------------------
The following thread deadlock occurs frequently on node stop causing the whole
test suite timeout
{noformat}
sys-#1524465%dht.GridCachePartitionedNearDisabledOptimisticTxNodeRestartTest0%
prio=10 tid=0x00007f082002a800 nid=0x6c13 waiting on condition
[0x00007f0793ebc000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x0000000748e6d530> (a
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:964)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1282)
at
java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:731)
at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.partitionMap(GridDhtPartitionTopologyImpl.java:1162)
at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.createPartitionsFullMessage(GridCachePartitionExchangeManager.java:1045)
at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.sendAllPartitions(GridCachePartitionExchangeManager.java:981)
at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.refreshPartitions(GridCachePartitionExchangeManager.java:964)
at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.access$2400(GridCachePartitionExchangeManager.java:131)
at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ResendTimeoutObject$1.run(GridCachePartitionExchangeManager.java:2506)
at
org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:6695)
at
org.apache.ignite.internal.processors.closure.GridClosureProcessor$1.body(GridClosureProcessor.java:827)
at
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
sys-#1524152%dht.GridCachePartitionedNearDisabledOptimisticTxNodeRestartTest0%
prio=10 tid=0x00007f07bc1eb800 nid=0x6abf waiting on condition
[0x00007f08d48ed000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x0000000748e6d9f8> (a
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:867)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1197)
at
java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock(ReentrantReadWriteLock.java:945)
at
org.apache.ignite.internal.util.StripedCompositeReadWriteLock$WriteLock.lock0(StripedCompositeReadWriteLock.java:154)
at
org.apache.ignite.internal.util.StripedCompositeReadWriteLock$WriteLock.lock(StripedCompositeReadWriteLock.java:123)
at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.onEvicted(GridDhtPartitionTopologyImpl.java:2253)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPreloader.onPartitionEvicted(GridDhtPreloader.java:461)
at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition.finishDestroy(GridDhtLocalPartition.java:731)
at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition.clearEvicting(GridDhtLocalPartition.java:699)
at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition.tryEvict(GridDhtLocalPartition.java:759)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPreloader$3.call(GridDhtPreloader.java:593)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPreloader$3.call(GridDhtPreloader.java:580)
at
org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:6639)
at
org.apache.ignite.internal.processors.closure.GridClosureProcessor$2.body(GridClosureProcessor.java:967)
at
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
exchange-worker-#1523966%dht.GridCachePartitionedNearDisabledOptimisticTxNodeRestartTest0%
prio=10 tid=0x00007f08bd16a800 nid=0x69f7 waiting on condition
[0x00007f0ac4afb000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:315)
at
org.apache.ignite.internal.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:177)
at
org.apache.ignite.internal.util.future.GridFutureAdapter.get(GridFutureAdapter.java:140)
at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.detectLostPartitions(GridDhtPartitionTopologyImpl.java:1839)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.detectLostPartitions(GridDhtPartitionsExchangeFuture.java:2146)
- locked <0x0000000747e37b58> (a java.lang.Object)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.finishExchangeOnCoordinator(GridDhtPartitionsExchangeFuture.java:2321)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.onAllReceived(GridDhtPartitionsExchangeFuture.java:2208)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.distributedExchange(GridDhtPartitionsExchangeFuture.java:1031)
at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.init(GridDhtPartitionsExchangeFuture.java:651)
at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2279)
at
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
at java.lang.Thread.run(Thread.java:745)
test-runner-#1523889%dht.GridCachePartitionedNearDisabledOptimisticTxNodeRestartTest%
prio=10 tid=0x00007f0ad9efa000 nid=0x69a6 waiting for monitor entry
[0x00007f071e903000]
java.lang.Thread.State: BLOCKED (on object monitor)
at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.cancel(GridCachePartitionExchangeManager.java:2115)
- waiting to lock <0x0000000747e37b58> (a java.lang.Object)
at
org.apache.ignite.internal.util.IgniteUtils.cancel(IgniteUtils.java:4672)
at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.onKernalStop0(GridCachePartitionExchangeManager.java:668)
at
org.apache.ignite.internal.processors.cache.GridCacheSharedManagerAdapter.onKernalStop(GridCacheSharedManagerAdapter.java:120)
at
org.apache.ignite.internal.processors.cache.GridCacheProcessor.onKernalStop(GridCacheProcessor.java:913)
at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:2234)
at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:2182)
at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2511)
- locked <0x0000000748b04c80> (a
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance)
at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2474)
at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:361)
at org.apache.ignite.Ignition.stop(Ignition.java:224)
at
org.apache.ignite.testframework.junits.GridAbstractTest.stopGrid(GridAbstractTest.java:1025)
at
org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1068)
at
org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1046)
at
org.apache.ignite.internal.processors.cache.distributed.GridCacheAbstractNodeRestartSelfTest.checkRestartWithTx(GridCacheAbstractNodeRestartSelfTest.java:854)
at
org.apache.ignite.internal.processors.cache.distributed.GridCacheAbstractNodeRestartSelfTest.testRestartWithTxFourNodesOneBackupsOffheapEvict(GridCacheAbstractNodeRestartSelfTest.java:452)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at junit.framework.TestCase.runTest(TestCase.java:176)
at
org.apache.ignite.testframework.junits.GridAbstractTest.runTestInternal(GridAbstractTest.java:2000)
at
org.apache.ignite.testframework.junits.GridAbstractTest.access$000(GridAbstractTest.java:132)
at
org.apache.ignite.testframework.junits.GridAbstractTest$5.run(GridAbstractTest.java:1915)
at java.lang.Thread.run(Thread.java:745)
{noformat}
> We need to cancel eviction instead of waiting it when we should own a
> partition because we had lost it
> ------------------------------------------------------------------------------------------------------
>
> Key: IGNITE-6433
> URL: https://issues.apache.org/jira/browse/IGNITE-6433
> Project: Ignite
> Issue Type: Bug
> Affects Versions: 2.1
> Reporter: Eduard Shangareev
>
> If PartitionLossPolicy.IGNORE is used and we have lost some partition which
> would belong to us because of affinity assignment and its state was RENTING
> then we would wait for its eviction completing what would hang cluster (the
> time of exchange would significantly increase).
> Instead of waiting we should cancel eviction and it's all.
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)