[ 
https://issues.apache.org/jira/browse/IGNITE-6433?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16287418#comment-16287418
 ] 

Alexandr Kuramshin commented on IGNITE-6433:
--------------------------------------------

The following thread deadlock occurs frequently on node stop causing the whole 
test suite timeout

{noformat}
sys-#1524465%dht.GridCachePartitionedNearDisabledOptimisticTxNodeRestartTest0%
prio=10 tid=0x00007f082002a800 nid=0x6c13 waiting on condition 
[0x00007f0793ebc000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <0x0000000748e6d530> (a 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:964)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1282)
        at 
java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:731)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.partitionMap(GridDhtPartitionTopologyImpl.java:1162)
        at 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.createPartitionsFullMessage(GridCachePartitionExchangeManager.java:1045)
        at 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.sendAllPartitions(GridCachePartitionExchangeManager.java:981)
        at 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.refreshPartitions(GridCachePartitionExchangeManager.java:964)
        at 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.access$2400(GridCachePartitionExchangeManager.java:131)
        at 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ResendTimeoutObject$1.run(GridCachePartitionExchangeManager.java:2506)
        at 
org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:6695)
        at 
org.apache.ignite.internal.processors.closure.GridClosureProcessor$1.body(GridClosureProcessor.java:827)
        at 
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        at java.lang.Thread.run(Thread.java:745)

sys-#1524152%dht.GridCachePartitionedNearDisabledOptimisticTxNodeRestartTest0%
prio=10 tid=0x00007f07bc1eb800 nid=0x6abf waiting on condition 
[0x00007f08d48ed000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <0x0000000748e6d9f8> (a 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:867)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1197)
        at 
java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock(ReentrantReadWriteLock.java:945)
        at 
org.apache.ignite.internal.util.StripedCompositeReadWriteLock$WriteLock.lock0(StripedCompositeReadWriteLock.java:154)
        at 
org.apache.ignite.internal.util.StripedCompositeReadWriteLock$WriteLock.lock(StripedCompositeReadWriteLock.java:123)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.onEvicted(GridDhtPartitionTopologyImpl.java:2253)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPreloader.onPartitionEvicted(GridDhtPreloader.java:461)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition.finishDestroy(GridDhtLocalPartition.java:731)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition.clearEvicting(GridDhtLocalPartition.java:699)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition.tryEvict(GridDhtLocalPartition.java:759)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPreloader$3.call(GridDhtPreloader.java:593)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPreloader$3.call(GridDhtPreloader.java:580)
        at 
org.apache.ignite.internal.util.IgniteUtils.wrapThreadLoader(IgniteUtils.java:6639)
        at 
org.apache.ignite.internal.processors.closure.GridClosureProcessor$2.body(GridClosureProcessor.java:967)
        at 
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        at java.lang.Thread.run(Thread.java:745)

exchange-worker-#1523966%dht.GridCachePartitionedNearDisabledOptimisticTxNodeRestartTest0%
prio=10 tid=0x00007f08bd16a800 nid=0x69f7 waiting on condition 
[0x00007f0ac4afb000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:315)
        at 
org.apache.ignite.internal.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:177)
        at 
org.apache.ignite.internal.util.future.GridFutureAdapter.get(GridFutureAdapter.java:140)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.detectLostPartitions(GridDhtPartitionTopologyImpl.java:1839)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.detectLostPartitions(GridDhtPartitionsExchangeFuture.java:2146)
        - locked <0x0000000747e37b58> (a java.lang.Object)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.finishExchangeOnCoordinator(GridDhtPartitionsExchangeFuture.java:2321)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.onAllReceived(GridDhtPartitionsExchangeFuture.java:2208)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.distributedExchange(GridDhtPartitionsExchangeFuture.java:1031)
        at 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.init(GridDhtPartitionsExchangeFuture.java:651)
        at 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2279)
        at 
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
        at java.lang.Thread.run(Thread.java:745)

test-runner-#1523889%dht.GridCachePartitionedNearDisabledOptimisticTxNodeRestartTest%
prio=10 tid=0x00007f0ad9efa000 nid=0x69a6 waiting for monitor entry 
[0x00007f071e903000]
   java.lang.Thread.State: BLOCKED (on object monitor)
        at 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.cancel(GridCachePartitionExchangeManager.java:2115)
        - waiting to lock <0x0000000747e37b58> (a java.lang.Object)
        at 
org.apache.ignite.internal.util.IgniteUtils.cancel(IgniteUtils.java:4672)
        at 
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.onKernalStop0(GridCachePartitionExchangeManager.java:668)
        at 
org.apache.ignite.internal.processors.cache.GridCacheSharedManagerAdapter.onKernalStop(GridCacheSharedManagerAdapter.java:120)
        at 
org.apache.ignite.internal.processors.cache.GridCacheProcessor.onKernalStop(GridCacheProcessor.java:913)
        at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:2234)
        at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:2182)
        at 
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2511)
        - locked <0x0000000748b04c80> (a 
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance)
        at 
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2474)
        at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:361)
        at org.apache.ignite.Ignition.stop(Ignition.java:224)
        at 
org.apache.ignite.testframework.junits.GridAbstractTest.stopGrid(GridAbstractTest.java:1025)
        at 
org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1068)
        at 
org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1046)
        at 
org.apache.ignite.internal.processors.cache.distributed.GridCacheAbstractNodeRestartSelfTest.checkRestartWithTx(GridCacheAbstractNodeRestartSelfTest.java:854)
        at 
org.apache.ignite.internal.processors.cache.distributed.GridCacheAbstractNodeRestartSelfTest.testRestartWithTxFourNodesOneBackupsOffheapEvict(GridCacheAbstractNodeRestartSelfTest.java:452)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at junit.framework.TestCase.runTest(TestCase.java:176)
        at 
org.apache.ignite.testframework.junits.GridAbstractTest.runTestInternal(GridAbstractTest.java:2000)
        at 
org.apache.ignite.testframework.junits.GridAbstractTest.access$000(GridAbstractTest.java:132)
        at 
org.apache.ignite.testframework.junits.GridAbstractTest$5.run(GridAbstractTest.java:1915)
        at java.lang.Thread.run(Thread.java:745)

{noformat}


> We need to cancel eviction instead of waiting it when we should own a 
> partition because we had lost it
> ------------------------------------------------------------------------------------------------------
>
>                 Key: IGNITE-6433
>                 URL: https://issues.apache.org/jira/browse/IGNITE-6433
>             Project: Ignite
>          Issue Type: Bug
>    Affects Versions: 2.1
>            Reporter: Eduard Shangareev
>
> If PartitionLossPolicy.IGNORE is used and we have lost some partition which 
> would belong to us because of affinity assignment and its state was RENTING 
> then we would wait for its eviction completing what would hang cluster (the 
> time of exchange would significantly increase).
> Instead of waiting we should cancel eviction and it's all.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to