Anton Kalashnikov created IGNITE-12594:
------------------------------------------
Summary: Deadlock between GridCacheDataStore#purgeExpiredInternal
and GridNearTxLocal#enlistWriteEntry
Key: IGNITE-12594
URL: https://issues.apache.org/jira/browse/IGNITE-12594
Project: Ignite
Issue Type: Bug
Reporter: Anton Kalashnikov
Assignee: Anton Kalashnikov
The deadlock is reproduced occasionally in PDS3 suite and can be seen in the
thread dump below.
One thread attempts to unwind evicts, acquires checkpoint read lock and then
locks {{GridCacheMapEntry}}. Another thread does {{GridCacheMapEntry#unswap}},
determines that the entry is expired and acquires checkpoint read lock to
remove the entry from the store.
We should not acquire checkpoint read lock inside of a locked
{{GridCacheMapEntry}}.
{code:java}Thread [name="updater-1", id=29900, state=WAITING, blockCnt=2,
waitCnt=4450]
Lock
[object=java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@2fc51685,
ownerName=null, ownerId=-1]
at sun.misc.Unsafe.park(Native Method)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:967)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1283)
at
java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:727)
at
o.a.i.i.processors.cache.persistence.GridCacheDatabaseSharedManager.checkpointReadLock(GridCacheDatabaseSharedManager.java:1632)
<- CP read lock
at
o.a.i.i.processors.cache.GridCacheMapEntry.onExpired(GridCacheMapEntry.java:4081)
at
o.a.i.i.processors.cache.GridCacheMapEntry.unswap(GridCacheMapEntry.java:559)
at
o.a.i.i.processors.cache.GridCacheMapEntry.unswap(GridCacheMapEntry.java:519)
<- locked entry
at
o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.enlistWriteEntry(GridNearTxLocal.java:1437)
at
o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.enlistWrite(GridNearTxLocal.java:1303)
at
o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.putAllAsync0(GridNearTxLocal.java:957)
at
o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.putAllAsync(GridNearTxLocal.java:491)
at
o.a.i.i.processors.cache.GridCacheAdapter$29.inOp(GridCacheAdapter.java:2526)
at
o.a.i.i.processors.cache.GridCacheAdapter$SyncInOp.op(GridCacheAdapter.java:4727)
at
o.a.i.i.processors.cache.GridCacheAdapter.syncOp(GridCacheAdapter.java:3740)
at
o.a.i.i.processors.cache.GridCacheAdapter.putAll0(GridCacheAdapter.java:2524)
at
o.a.i.i.processors.cache.GridCacheAdapter.putAll(GridCacheAdapter.java:2513)
at
o.a.i.i.processors.cache.IgniteCacheProxyImpl.putAll(IgniteCacheProxyImpl.java:1264)
at
o.a.i.i.processors.cache.GatewayProtectedCacheProxy.putAll(GatewayProtectedCacheProxy.java:863)
at
o.a.i.i.processors.cache.persistence.IgnitePdsContinuousRestartTest$1.call(IgnitePdsContinuousRestartTest.java:291)
at o.a.i.testframework.GridTestThread.run(GridTestThread.java:83)
Locked synchronizers:
java.util.concurrent.locks.ReentrantLock$NonfairSync@762613f7
Thread
[name="sys-stripe-0-#24086%persistence.IgnitePdsContinuousRestartTestWithExpiryPolicy0%",
id=29617, state=WAITING, blockCnt=2, waitCnt=65381]
Lock [object=java.util.concurrent.locks.ReentrantLock$NonfairSync@762613f7,
ownerName=updater-1, ownerId=29900]
at sun.misc.Unsafe.park(Native Method)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199)
at
java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209)
at
java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285)
<- lock entry
at
o.a.i.i.processors.cache.GridCacheMapEntry.lockEntry(GridCacheMapEntry.java:5017)
at
o.a.i.i.processors.cache.GridCacheMapEntry.markObsoleteVersion(GridCacheMapEntry.java:2799)
at
o.a.i.i.processors.cache.distributed.dht.topology.GridDhtLocalPartition.removeVersionedEntry(GridDhtLocalPartition.java:392)
at
o.a.i.i.processors.cache.distributed.dht.topology.GridDhtLocalPartition.cleanupRemoveQueue(GridDhtLocalPartition.java:416)
at
o.a.i.i.processors.cache.distributed.dht.topology.GridDhtLocalPartition.onDeferredDelete(GridDhtLocalPartition.java:441)
at
o.a.i.i.processors.cache.distributed.dht.GridDhtCacheAdapter.onDeferredDelete(GridDhtCacheAdapter.java:1696)
at
o.a.i.i.processors.cache.GridCacheContext.onDeferredDelete(GridCacheContext.java:1710)
at
o.a.i.i.processors.cache.GridCacheMapEntry.onTtlExpired(GridCacheMapEntry.java:4037)
at
o.a.i.i.processors.cache.GridCacheTtlManager$1.applyx(GridCacheTtlManager.java:75)
at
o.a.i.i.processors.cache.GridCacheTtlManager$1.applyx(GridCacheTtlManager.java:66)
at o.a.i.i.util.lang.IgniteInClosure2X.apply(IgniteInClosure2X.java:37)
at
o.a.i.i.processors.cache.persistence.GridCacheOffheapManager$GridCacheDataStore.purgeExpiredInternal(GridCacheOffheapManager.java:2725)
<- CP read lock
at
o.a.i.i.processors.cache.persistence.GridCacheOffheapManager$GridCacheDataStore.purgeExpired(GridCacheOffheapManager.java:2651)
at
o.a.i.i.processors.cache.persistence.GridCacheOffheapManager.expire(GridCacheOffheapManager.java:1047)
at
o.a.i.i.processors.cache.GridCacheTtlManager.expire(GridCacheTtlManager.java:242)
at
o.a.i.i.processors.cache.GridCacheUtils.unwindEvicts(GridCacheUtils.java:874)
at
o.a.i.i.processors.cache.transactions.IgniteTxStateImpl.unwindEvicts(IgniteTxStateImpl.java:106)
at
o.a.i.i.processors.cache.GridCacheIoManager.onMessageProcessed(GridCacheIoManager.java:1182)
at
o.a.i.i.processors.cache.GridCacheIoManager.processMessage(GridCacheIoManager.java:1161)
at
o.a.i.i.processors.cache.GridCacheIoManager.onMessage0(GridCacheIoManager.java:591)
at
o.a.i.i.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:392)
at
o.a.i.i.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:318)
at
o.a.i.i.processors.cache.GridCacheIoManager.access$100(GridCacheIoManager.java:109)
at
o.a.i.i.processors.cache.GridCacheIoManager$1.onMessage(GridCacheIoManager.java:308)
at
o.a.i.i.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1607)
at
o.a.i.i.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1231)
at
o.a.i.i.managers.communication.GridIoManager.access$4300(GridIoManager.java:132)
at
o.a.i.i.managers.communication.GridIoManager$8.run(GridIoManager.java:1124)
at o.a.i.i.util.StripedExecutor$Stripe.body(StripedExecutor.java:559)
at o.a.i.i.util.worker.GridWorker.run(GridWorker.java:119)
at java.lang.Thread.run(Thread.java:748){code}
Reproduced by PDS 3
[https://ggtc.gridgain.com/viewLog.html?buildId=2706284&buildTypeId=Tests_GridGainCeEeUe_Latest_CE_Pds3&tab=buildResultsDiv&branch_Tests_GridGainCeEeUe_Latest_CE=<default>|https://ggtc.gridgain.com/viewLog.html?buildId=2706284&buildTypeId=Tests_GridGainCeEeUe_Latest_CE_Pds3&tab=buildResultsDiv&branch_Tests_GridGainCeEeUe_Latest_CE=%3Cdefault%3E]
--
This message was sent by Atlassian Jira
(v8.3.4#803005)