[
https://issues.apache.org/jira/browse/GEODE-8862?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17289402#comment-17289402
]
Eric Shu commented on GEODE-8862:
---------------------------------
The issue is caused by the p2p reader thread (P2P message reader for
rs-GEM-3166-PL1535a2i32xlarge-hydra-client-36(persistgemfire8_host1_8586:8586)
was blocked on a synchronized lock and not able to handle the DLock GRANT
message. The DLock is needed by the thread holding the synchronized lock.
This issue occurs with scope of DISTRIBUTED_NO_ACK only. As the thread sending
the DistributedCacheOperation does not wait for the reply from remote node, and
it sends the GRANT message immediately afterwards.
{noformat}
warn 2021/02/01 16:16:32.301 PST persistgemfire9_host1_8517 <ThreadsMonitor>
tid=0x1d] Thread 81 (0x51) is stuck
[warn 2021/02/01 16:16:32.307 PST persistgemfire9_host1_8517 <ThreadsMonitor>
tid=0x1d] Thread <81> (0x51) that was executed at <01 Feb 2021 16:15:32 PST>
has been stuck for <60.0 seconds> and number of thread monitor iteration <1>
Thread Name <P2P message reader for
rs-GEM-3166-PL1535a2i32xlarge-hydra-client-36(persistgemfire8_host1_8586:8586)<ec><v51>:41006
unshared ordered uid=1036 dom #1 local port=47207 remote port=42068> state
<BLOCKED>
Waiting on
<org.apache.geode.internal.cache.entries.VersionedThinDiskRegionEntryHeapStringKey2@1699c3cf>
Owned By <vm_10_thr_29_persist9_host1_8517> with ID <1530>
Executor Group <P2PReaderExecutor>
Monitored metric <ResourceManagerStats.numThreadsStuck>
Thread stack:
org.apache.geode.internal.cache.map.RegionMapDestroy.handleExistingRegionEntry(RegionMapDestroy.java:214)
org.apache.geode.internal.cache.map.RegionMapDestroy.destroy(RegionMapDestroy.java:152)
org.apache.geode.internal.cache.AbstractRegionMap.destroy(AbstractRegionMap.java:969)
org.apache.geode.internal.cache.LocalRegion.mapDestroy(LocalRegion.java:6505)
org.apache.geode.internal.cache.LocalRegion.mapDestroy(LocalRegion.java:6479)
org.apache.geode.internal.cache.LocalRegionDataView.destroyExistingEntry(LocalRegionDataView.java:59)
org.apache.geode.internal.cache.LocalRegion.basicDestroy(LocalRegion.java:6430)
org.apache.geode.internal.cache.DistributedRegion.basicDestroy(DistributedRegion.java:1730)
org.apache.geode.internal.cache.DestroyOperation$DestroyMessage.operateOnRegion(DestroyOperation.java:88)
org.apache.geode.internal.cache.DistributedCacheOperation$CacheOperationMessage.basicProcess(DistributedCacheOperation.java:1208)
org.apache.geode.internal.cache.DistributedCacheOperation$CacheOperationMessage.process(DistributedCacheOperation.java:1110)
org.apache.geode.distributed.internal.DistributionMessage.scheduleAction(DistributionMessage.java:376)
org.apache.geode.distributed.internal.DistributionMessage.schedule(DistributionMessage.java:432)
org.apache.geode.distributed.internal.ClusterDistributionManager.scheduleIncomingMessage(ClusterDistributionManager.java:2070)
org.apache.geode.distributed.internal.ClusterDistributionManager.handleIncomingDMsg(ClusterDistributionManager.java:1832)
org.apache.geode.distributed.internal.ClusterDistributionManager$$Lambda$102/1096792171.messageReceived(Unknown
Source)
org.apache.geode.distributed.internal.membership.gms.GMSMembership.dispatchMessage(GMSMembership.java:925)
org.apache.geode.distributed.internal.membership.gms.GMSMembership.handleOrDeferMessage(GMSMembership.java:856)
org.apache.geode.distributed.internal.membership.gms.GMSMembership.processMessage(GMSMembership.java:1198)
org.apache.geode.distributed.internal.DistributionImpl$MyDCReceiver.messageReceived(DistributionImpl.java:828)
org.apache.geode.distributed.internal.direct.DirectChannel.receive(DirectChannel.java:614)
org.apache.geode.internal.tcp.TCPConduit.messageReceived(TCPConduit.java:679)
org.apache.geode.internal.tcp.Connection.dispatchMessage(Connection.java:3268)
org.apache.geode.internal.tcp.Connection.readMessage(Connection.java:2993)
org.apache.geode.internal.tcp.Connection.processInputBuffer(Connection.java:2797)
org.apache.geode.internal.tcp.Connection.readMessages(Connection.java:1651)
org.apache.geode.internal.tcp.Connection.run(Connection.java:1482)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:748)
Lock owner thread stack
sun.misc.Unsafe.park(Native Method)
Lock owner thread stack
sun.misc.Unsafe.park(Native Method)
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(AbstractQueuedSynchronizer.java:1037)
java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(AbstractQueuedSynchronizer.java:1328)
java.util.concurrent.CountDownLatch.await(CountDownLatch.java:277)
org.apache.geode.internal.util.concurrent.StoppableCountDownLatch.await(StoppableCountDownLatch.java:72)
org.apache.geode.distributed.internal.ReplyProcessor21.basicWait(ReplyProcessor21.java:723)
org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:794)
org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:771)
org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:857)
org.apache.geode.distributed.internal.locks.DLockRequestProcessor.requestLock(DLockRequestProcessor.java:238)
org.apache.geode.distributed.internal.locks.DLockService.lockInterruptibly(DLockService.java:1505)
org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1271)
org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1262)
org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1257)
org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1253)
org.apache.geode.pdx.internal.PeerTypeRegistration.lock(PeerTypeRegistration.java:314)
org.apache.geode.pdx.internal.PeerTypeRegistration.defineEnum(PeerTypeRegistration.java:646)
org.apache.geode.pdx.internal.PeerTypeRegistration.getEnumId(PeerTypeRegistration.java:601)
org.apache.geode.pdx.internal.TypeRegistry.getEnumId(TypeRegistry.java:363)
org.apache.geode.internal.InternalDataSerializer.writePdxEnum(InternalDataSerializer.java:2071)
org.apache.geode.internal.InternalDataSerializer.writeUserObject(InternalDataSerializer.java:1610)
org.apache.geode.internal.InternalDataSerializer.writeWellKnownObject(InternalDataSerializer.java:1517)
org.apache.geode.internal.InternalDataSerializer.basicWriteObject(InternalDataSerializer.java:2034)
org.apache.geode.pdx.internal.PdxOutputStream.writeObject(PdxOutputStream.java:72)
org.apache.geode.pdx.internal.PdxWriterImpl.writeObject(PdxWriterImpl.java:341)
org.apache.geode.pdx.internal.PdxWriterImpl.writeObject(PdxWriterImpl.java:330)
util.VersionedValueHolder.myToData(VersionedValueHolder.java:227)
util.PdxVersionedValueHolder.toData(PdxVersionedValueHolder.java:84)
org.apache.geode.internal.InternalDataSerializer.writePdx(InternalDataSerializer.java:2794)
org.apache.geode.internal.InternalDataSerializer.basicWriteObject(InternalDataSerializer.java:2011)
org.apache.geode.DataSerializer.writeObject(DataSerializer.java:2839)
org.apache.geode.internal.util.BlobHelper.serializeToBlob(BlobHelper.java:54)
org.apache.geode.internal.cache.EntryEventImpl.serialize(EntryEventImpl.java:2092)
org.apache.geode.internal.cache.EntryEventImpl.serialize(EntryEventImpl.java:2078)
org.apache.geode.internal.cache.entries.DiskEntry$Helper.createValueWrapper(DiskEntry.java:768)
org.apache.geode.internal.cache.entries.DiskEntry$Helper.basicUpdate(DiskEntry.java:955)
org.apache.geode.internal.cache.entries.DiskEntry$Helper.update(DiskEntry.java:867)
org.apache.geode.internal.cache.entries.AbstractDiskRegionEntry.setValue(AbstractDiskRegionEntry.java:40)
org.apache.geode.internal.cache.entries.AbstractRegionEntry.setValueWithTombstoneCheck(AbstractRegionEntry.java:290)
{noformat}
> DLockRecoverGrantorProcessor.recoverLockGrantor sometimes hangs
> ---------------------------------------------------------------
>
> Key: GEODE-8862
> URL: https://issues.apache.org/jira/browse/GEODE-8862
> Project: Geode
> Issue Type: Bug
> Components: distributed lock service
> Affects Versions: 1.14.0
> Reporter: Eric Shu
> Priority: Major
> Labels: blocks-1.14.0, pull-request-available
>
> "vm_9_thr_27_persist8_host1_29953" #1448 daemon prio=5 os_prio=0 cpu=45.99ms
> elapsed=698.42s tid=0x00007f8d0c052800 nid=0x2ee8 waiting on condition
> [0x00007f8c354f6000]
> java.lang.Thread.State: TIMED_WAITING (parking)
> at jdk.internal.misc.Unsafe.park([email protected]/Native Method)
> - parking to wait for <0x00000000fb7bb488> (a
> java.util.concurrent.CountDownLatch$Sync)
> at
> java.util.concurrent.locks.LockSupport.parkNanos([email protected]/LockSupport.java:234)
> at
> java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos([email protected]/AbstractQueuedSynchronizer.java:1079)
> at
> java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos([email protected]/AbstractQueuedSynchronizer.java:1369)
> at
> java.util.concurrent.CountDownLatch.await([email protected]/CountDownLatch.java:278)
> at
> org.apache.geode.internal.util.concurrent.StoppableCountDownLatch.await(StoppableCountDownLatch.java:72)
> at
> org.apache.geode.distributed.internal.ReplyProcessor21.basicWait(ReplyProcessor21.java:723)
> at
> org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:794)
> at
> org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:771)
> at
> org.apache.geode.distributed.internal.ReplyProcessor21.waitForRepliesUninterruptibly(ReplyProcessor21.java:857)
> at
> org.apache.geode.distributed.internal.locks.DLockRecoverGrantorProcessor.recoverLockGrantor(DLockRecoverGrantorProcessor.java:100)
> at
> org.apache.geode.distributed.internal.locks.DLockService.makeLocalGrantor(DLockService.java:447)
> at
> org.apache.geode.distributed.internal.locks.DLockService.createLocalGrantor(DLockService.java:392)
> at
> org.apache.geode.distributed.internal.locks.DLockService.getLockGrantorId(DLockService.java:337)
> at
> org.apache.geode.distributed.internal.locks.DLockService.lockInterruptibly(DLockService.java:1445)
> at
> org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1241)
> at
> org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1232)
> at
> org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1227)
> at
> org.apache.geode.distributed.internal.locks.DLockService.lock(DLockService.java:1223)
> at
> org.apache.geode.pdx.internal.PeerTypeRegistration.lock(PeerTypeRegistration.java:314)
> at
> org.apache.geode.pdx.internal.PeerTypeRegistration.defineEnum(PeerTypeRegistration.java:646)
> at
> org.apache.geode.pdx.internal.PeerTypeRegistration.getEnumId(PeerTypeRegistration.java:601)
> at
> org.apache.geode.pdx.internal.TypeRegistry.getEnumId(TypeRegistry.java:363)
> at
> org.apache.geode.internal.InternalDataSerializer.writePdxEnum(InternalDataSerializer.java:2071)
> at
> org.apache.geode.internal.InternalDataSerializer.writeUserObject(InternalDataSerializer.java:1610)
> at
> org.apache.geode.internal.InternalDataSerializer.writeWellKnownObject(InternalDataSerializer.java:1517)
> at
> org.apache.geode.internal.InternalDataSerializer.basicWriteObject(InternalDataSerializer.java:2034)
> at
> org.apache.geode.pdx.internal.PdxOutputStream.writeObject(PdxOutputStream.java:72)
> at
> org.apache.geode.pdx.internal.PdxWriterImpl.writeObject(PdxWriterImpl.java:341)
> at
> org.apache.geode.pdx.internal.PdxWriterImpl.writeObject(PdxWriterImpl.java:330)
> at util.VersionedValueHolder.myToData(VersionedValueHolder.java:227)
> at
> util.PdxVersionedValueHolder.toData(PdxVersionedValueHolder.java:84)
> at
> org.apache.geode.internal.InternalDataSerializer.writePdx(InternalDataSerializer.java:2794)
> at
> org.apache.geode.internal.InternalDataSerializer.basicWriteObject(InternalDataSerializer.java:2011)
> at
> org.apache.geode.DataSerializer.writeObject(DataSerializer.java:2839)
> at
> org.apache.geode.internal.util.BlobHelper.serializeToBlob(BlobHelper.java:54)
> at
> org.apache.geode.internal.cache.EntryEventImpl.serialize(EntryEventImpl.java:2092)
> at
> org.apache.geode.internal.cache.EntryEventImpl.serialize(EntryEventImpl.java:2078)
> at
> org.apache.geode.internal.cache.entries.DiskEntry$Helper.createValueWrapper(DiskEntry.java:768)
> at
> org.apache.geode.internal.cache.entries.DiskEntry$Helper.basicUpdate(DiskEntry.java:955)
> at
> org.apache.geode.internal.cache.entries.DiskEntry$Helper.update(DiskEntry.java:867)
> - locked <0x00000000faaeb3d0> (a
> org.apache.geode.internal.cache.DiskId$PersistenceWithIntOffset)
> at
> org.apache.geode.internal.cache.entries.AbstractDiskRegionEntry.setValue(AbstractDiskRegionEntry.java:40)
> at
> org.apache.geode.internal.cache.entries.AbstractRegionEntry.setValueWithTombstoneCheck(AbstractRegionEntry.java:290)
> at
> org.apache.geode.internal.cache.EntryEventImpl.setNewValueInRegion(EntryEventImpl.java:1767)
> at
> org.apache.geode.internal.cache.EntryEventImpl.putExistingEntry(EntryEventImpl.java:1640)
> at
> org.apache.geode.internal.cache.map.RegionMapPut.updateEntry(RegionMapPut.java:485)
> at
> org.apache.geode.internal.cache.map.RegionMapPut.createOrUpdateEntry(RegionMapPut.java:256)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut.doPutAndDeliverEvent(AbstractRegionMapPut.java:300)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut$$Lambda$420/0x0000000100a2b440.run(Unknown
> Source)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut.runWithIndexUpdatingInProgress(AbstractRegionMapPut.java:308)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut.doPutIfPreconditionsSatisified(AbstractRegionMapPut.java:296)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut.doPutOnSynchronizedRegionEntry(AbstractRegionMapPut.java:282)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut.doPutOnRegionEntryInMap(AbstractRegionMapPut.java:273)
> - locked <0x00000000faaeb388> (a
> org.apache.geode.internal.cache.entries.VersionedThinDiskRegionEntryHeapStringKey2)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut.addRegionEntryToMapAndDoPut(AbstractRegionMapPut.java:251)
> - locked <0x00000000faaeb388> (a
> org.apache.geode.internal.cache.entries.VersionedThinDiskRegionEntryHeapStringKey2)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut.doPutRetryingIfNeeded(AbstractRegionMapPut.java:216)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut$$Lambda$419/0x0000000100a2b040.run(Unknown
> Source)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut.doWithIndexInUpdateMode(AbstractRegionMapPut.java:198)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut.doPut(AbstractRegionMapPut.java:180)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut$$Lambda$418/0x0000000100a2ac40.run(Unknown
> Source)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut.runWhileLockedForCacheModification(AbstractRegionMapPut.java:119)
> at
> org.apache.geode.internal.cache.map.RegionMapPut.runWhileLockedForCacheModification(RegionMapPut.java:161)
> at
> org.apache.geode.internal.cache.map.AbstractRegionMapPut.put(AbstractRegionMapPut.java:169)
> at
> org.apache.geode.internal.cache.AbstractRegionMap.basicPut(AbstractRegionMap.java:2036)
> at
> org.apache.geode.internal.cache.LocalRegion.virtualPut(LocalRegion.java:5600)
> at
> org.apache.geode.internal.cache.DistributedRegion.virtualPut(DistributedRegion.java:393)
> at
> org.apache.geode.internal.cache.LocalRegion.virtualPut(LocalRegion.java:5578)
> at
> org.apache.geode.internal.cache.LocalRegionDataView.putEntry(LocalRegionDataView.java:157)
> at
> org.apache.geode.internal.cache.LocalRegion.basicPut(LocalRegion.java:5036)
> at
> org.apache.geode.internal.cache.LocalRegion.validatedPut(LocalRegion.java:1635)
> at
> org.apache.geode.internal.cache.LocalRegion.put(LocalRegion.java:1622)
> at diskRecovery.RecoveryTest.updateEntry(RecoveryTest.java:3341)
> at diskRecovery.RecoveryTest.updateEntry(RecoveryTest.java:3318)
> at diskRecovery.RecoveryTest.doOperations(RecoveryTest.java:2929)
> at
> diskRecovery.RecoveryTest.concRecoverLatestResponder(RecoveryTest.java:2640)
> at
> diskRecovery.RecoveryTest.HydraTask_concRecoverLatestResponder(RecoveryTest.java:500)
> at
> jdk.internal.reflect.NativeMethodAccessorImpl.invoke0([email protected]/Native
> Method)
> at
> jdk.internal.reflect.NativeMethodAccessorImpl.invoke([email protected]/NativeMethodAccessorImpl.java:62)
> at
> jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke([email protected]/DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke([email protected]/Method.java:566)
> at hydra.MethExecutor.execute(MethExecutor.java:173)
> at hydra.MethExecutor.execute(MethExecutor.java:141)
> at hydra.TestTask.execute(TestTask.java:197)
> at hydra.RemoteTestModule$1.run(RemoteTestModule.java:213)
--
This message was sent by Atlassian Jira
(v8.3.4#803005)