[ 
https://issues.apache.org/jira/browse/ARTEMIS-3054?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Clebert Suconic updated ARTEMIS-3054:
-------------------------------------
    Description: 
there are inconsistencies between replicate catch up and start/stop Paging:

 
 * a Deadlock (2):
 * When the getIDs for replication is called (same place where the deadlock 
would occur), a next page is called what could affect the ability to cleanup
 * There's a waitCompletion on paging that could be removed(1).

(1):
{code:java}
protected void storeBookmark(ArrayList<PageSubscription> cursorList, Page 
currentPage) throws Exception { 
...

if (!storageManager.waitOnOperations(5000)) {
 
ActiveMQServerLogger.LOGGER.problemCompletingOperations(storageManager.getContext());
}

...{code}
{code:java}
(2)

Deadlock detected!
"Thread-3726" Id=196867 WAITING on 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@5bf57f48 owned by 
"Thread-2 
(ActiveMQ-IO-server-org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl$7@74a88964)"
 Id=196843
 at sun.misc.Unsafe.park(Native Method)

waiting on 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@5bf57f48
 at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:967)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1283)
 at 
java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:727)
 at 
org.apache.activemq.artemis.core.paging.impl.PagingStoreImpl.getCurrentIds(PagingStoreImpl.java:1165)
 at 
org.apache.activemq.artemis.core.persistence.impl.journal.JournalStorageManager.getPageInformationForSync(JournalStorageManager.java:738)
 at 
org.apache.activemq.artemis.core.persistence.impl.journal.JournalStorageManager.startReplication(JournalStorageManager.java:666)
 at 
org.apache.activemq.artemis.core.server.impl.SharedNothingLiveActivation$2.run(SharedNothingLiveActivation.java:180)
 at java.lang.Thread.run(Thread.java:748)
Number of locked synchronizers = 6

java.util.concurrent.locks.ReentrantReadWriteLock$FairSync@6611a79d 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@57b2d630 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@379c0bb5 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@26e7261e 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@4d214010 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@c630cd2
"Thread-2 
(ActiveMQ-IO-server-org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl$7@74a88964)"
 Id=196843 WAITING on 
java.util.concurrent.locks.ReentrantReadWriteLock$FairSync@6611a79d owned by 
"Thread-3726" Id=196867
 at sun.misc.Unsafe.park(Native Method)

waiting on java.util.concurrent.locks.ReentrantReadWriteLock$FairSync@6611a79d
 at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:967)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1283)
 at 
java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:727)
 at 
org.apache.activemq.artemis.core.persistence.impl.journal.AbstractJournalStorageManager.readLock(AbstractJournalStorageManager.java:400)
 at 
org.apache.activemq.artemis.core.paging.cursor.impl.PageSubscriptionCounterImpl.delete(PageSubscriptionCounterImpl.java:257)
 at 
org.apache.activemq.artemis.core.paging.cursor.impl.PageSubscriptionImpl.onPageModeCleared(PageSubscriptionImpl.java:273)
 at 
org.apache.activemq.artemis.core.paging.cursor.impl.PageCursorProviderImpl.onPageModeCleared(PageCursorProviderImpl.java:400)
 at 
org.apache.activemq.artemis.core.paging.impl.PagingStoreImpl.stopPaging(PagingStoreImpl.java:505)
 at 
org.apache.activemq.artemis.core.paging.cursor.impl.PageCursorProviderImpl.cleanup(PageCursorProviderImpl.java:486)
 locked 
org.apache.activemq.artemis.core.paging.cursor.impl.PageCursorProviderImpl@5048ce77
 at 
org.apache.activemq.artemis.core.paging.cursor.impl.PageCursorProviderImpl$1.run(PageCursorProviderImpl.java:377)
 at 
org.apache.activemq.artemis.utils.actors.OrderedExecutor.doTask(OrderedExecutor.java:42)
 at 
org.apache.activemq.artemis.utils.actors.OrderedExecutor.doTask(OrderedExecutor.java:31)
 at 
org.apache.activemq.artemis.utils.actors.ProcessorBase.executePendingTasks(ProcessorBase.java:65)
 at 
org.apache.activemq.artemis.utils.actors.ProcessorBase$$Lambda$10/1881901842.run(Unknown
 Source)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at 
org.apache.activemq.artemis.utils.ActiveMQThreadFactory$1.run(ActiveMQThreadFactory.java:118)
Number of locked synchronizers = 2

java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@5bf57f48 
java.util.concurrent.ThreadPoolExecutor$Worker@588703a3 {code}

  was:
there are inconsistencies between replicate catch up and start/stop Paging:

 
 * a Deadlock (2):
 * When the getIDs for replication is called (same place where the deadlock 
would occur), a next page is called what could affect the ability to cleanup
 * There's a waitCompletion on paging that could be removed(1).

(1):
{code:java}
protected void storeBookmark(ArrayList<PageSubscription> cursorList, Page 
currentPage) throws Exception { try { // First 
......
if (!storageManager.waitOnOperations(5000)) {
 
ActiveMQServerLogger.LOGGER.problemCompletingOperations(storageManager.getContext());
}

...{code}
{code:java}
Deadlock detected!
"Thread-3726" Id=196867 WAITING on 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@5bf57f48 owned by 
"Thread-2 
(ActiveMQ-IO-server-org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl$7@74a88964)"
 Id=196843
 at sun.misc.Unsafe.park(Native Method)

waiting on 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@5bf57f48
 at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:967)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1283)
 at 
java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:727)
 at 
org.apache.activemq.artemis.core.paging.impl.PagingStoreImpl.getCurrentIds(PagingStoreImpl.java:1165)
 at 
org.apache.activemq.artemis.core.persistence.impl.journal.JournalStorageManager.getPageInformationForSync(JournalStorageManager.java:738)
 at 
org.apache.activemq.artemis.core.persistence.impl.journal.JournalStorageManager.startReplication(JournalStorageManager.java:666)
 at 
org.apache.activemq.artemis.core.server.impl.SharedNothingLiveActivation$2.run(SharedNothingLiveActivation.java:180)
 at java.lang.Thread.run(Thread.java:748)
Number of locked synchronizers = 6

java.util.concurrent.locks.ReentrantReadWriteLock$FairSync@6611a79d 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@57b2d630 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@379c0bb5 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@26e7261e 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@4d214010 
java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@c630cd2
"Thread-2 
(ActiveMQ-IO-server-org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl$7@74a88964)"
 Id=196843 WAITING on 
java.util.concurrent.locks.ReentrantReadWriteLock$FairSync@6611a79d owned by 
"Thread-3726" Id=196867
 at sun.misc.Unsafe.park(Native Method)

waiting on java.util.concurrent.locks.ReentrantReadWriteLock$FairSync@6611a79d
 at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:967)
 at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1283)
 at 
java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:727)
 at 
org.apache.activemq.artemis.core.persistence.impl.journal.AbstractJournalStorageManager.readLock(AbstractJournalStorageManager.java:400)
 at 
org.apache.activemq.artemis.core.paging.cursor.impl.PageSubscriptionCounterImpl.delete(PageSubscriptionCounterImpl.java:257)
 at 
org.apache.activemq.artemis.core.paging.cursor.impl.PageSubscriptionImpl.onPageModeCleared(PageSubscriptionImpl.java:273)
 at 
org.apache.activemq.artemis.core.paging.cursor.impl.PageCursorProviderImpl.onPageModeCleared(PageCursorProviderImpl.java:400)
 at 
org.apache.activemq.artemis.core.paging.impl.PagingStoreImpl.stopPaging(PagingStoreImpl.java:505)
 at 
org.apache.activemq.artemis.core.paging.cursor.impl.PageCursorProviderImpl.cleanup(PageCursorProviderImpl.java:486)
 locked 
org.apache.activemq.artemis.core.paging.cursor.impl.PageCursorProviderImpl@5048ce77
 at 
org.apache.activemq.artemis.core.paging.cursor.impl.PageCursorProviderImpl$1.run(PageCursorProviderImpl.java:377)
 at 
org.apache.activemq.artemis.utils.actors.OrderedExecutor.doTask(OrderedExecutor.java:42)
 at 
org.apache.activemq.artemis.utils.actors.OrderedExecutor.doTask(OrderedExecutor.java:31)
 at 
org.apache.activemq.artemis.utils.actors.ProcessorBase.executePendingTasks(ProcessorBase.java:65)
 at 
org.apache.activemq.artemis.utils.actors.ProcessorBase$$Lambda$10/1881901842.run(Unknown
 Source)
 at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
 at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
 at 
org.apache.activemq.artemis.utils.ActiveMQThreadFactory$1.run(ActiveMQThreadFactory.java:118)
Number of locked synchronizers = 2

java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@5bf57f48 
java.util.concurrent.ThreadPoolExecutor$Worker@588703a3 {code}


> Clashes between start/stop paging and replication catch up
> ----------------------------------------------------------
>
>                 Key: ARTEMIS-3054
>                 URL: https://issues.apache.org/jira/browse/ARTEMIS-3054
>             Project: ActiveMQ Artemis
>          Issue Type: Improvement
>          Components: Broker
>    Affects Versions: 2.16.0
>            Reporter: Clebert Suconic
>            Priority: Major
>             Fix For: 2.17.0
>
>
> there are inconsistencies between replicate catch up and start/stop Paging:
>  
>  * a Deadlock (2):
>  * When the getIDs for replication is called (same place where the deadlock 
> would occur), a next page is called what could affect the ability to cleanup
>  * There's a waitCompletion on paging that could be removed(1).
> (1):
> {code:java}
> protected void storeBookmark(ArrayList<PageSubscription> cursorList, Page 
> currentPage) throws Exception { 
> ...
> if (!storageManager.waitOnOperations(5000)) {
>  
> ActiveMQServerLogger.LOGGER.problemCompletingOperations(storageManager.getContext());
> }
> ...{code}
> {code:java}
> (2)
> Deadlock detected!
> "Thread-3726" Id=196867 WAITING on 
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@5bf57f48 owned 
> by "Thread-2 
> (ActiveMQ-IO-server-org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl$7@74a88964)"
>  Id=196843
>  at sun.misc.Unsafe.park(Native Method)
> waiting on 
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@5bf57f48
>  at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
>  at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
>  at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:967)
>  at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1283)
>  at 
> java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:727)
>  at 
> org.apache.activemq.artemis.core.paging.impl.PagingStoreImpl.getCurrentIds(PagingStoreImpl.java:1165)
>  at 
> org.apache.activemq.artemis.core.persistence.impl.journal.JournalStorageManager.getPageInformationForSync(JournalStorageManager.java:738)
>  at 
> org.apache.activemq.artemis.core.persistence.impl.journal.JournalStorageManager.startReplication(JournalStorageManager.java:666)
>  at 
> org.apache.activemq.artemis.core.server.impl.SharedNothingLiveActivation$2.run(SharedNothingLiveActivation.java:180)
>  at java.lang.Thread.run(Thread.java:748)
> Number of locked synchronizers = 6
> java.util.concurrent.locks.ReentrantReadWriteLock$FairSync@6611a79d 
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@57b2d630 
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@379c0bb5 
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@26e7261e 
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@4d214010 
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@c630cd2
> "Thread-2 
> (ActiveMQ-IO-server-org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl$7@74a88964)"
>  Id=196843 WAITING on 
> java.util.concurrent.locks.ReentrantReadWriteLock$FairSync@6611a79d owned by 
> "Thread-3726" Id=196867
>  at sun.misc.Unsafe.park(Native Method)
> waiting on java.util.concurrent.locks.ReentrantReadWriteLock$FairSync@6611a79d
>  at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
>  at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
>  at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:967)
>  at 
> java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1283)
>  at 
> java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:727)
>  at 
> org.apache.activemq.artemis.core.persistence.impl.journal.AbstractJournalStorageManager.readLock(AbstractJournalStorageManager.java:400)
>  at 
> org.apache.activemq.artemis.core.paging.cursor.impl.PageSubscriptionCounterImpl.delete(PageSubscriptionCounterImpl.java:257)
>  at 
> org.apache.activemq.artemis.core.paging.cursor.impl.PageSubscriptionImpl.onPageModeCleared(PageSubscriptionImpl.java:273)
>  at 
> org.apache.activemq.artemis.core.paging.cursor.impl.PageCursorProviderImpl.onPageModeCleared(PageCursorProviderImpl.java:400)
>  at 
> org.apache.activemq.artemis.core.paging.impl.PagingStoreImpl.stopPaging(PagingStoreImpl.java:505)
>  at 
> org.apache.activemq.artemis.core.paging.cursor.impl.PageCursorProviderImpl.cleanup(PageCursorProviderImpl.java:486)
>  locked 
> org.apache.activemq.artemis.core.paging.cursor.impl.PageCursorProviderImpl@5048ce77
>  at 
> org.apache.activemq.artemis.core.paging.cursor.impl.PageCursorProviderImpl$1.run(PageCursorProviderImpl.java:377)
>  at 
> org.apache.activemq.artemis.utils.actors.OrderedExecutor.doTask(OrderedExecutor.java:42)
>  at 
> org.apache.activemq.artemis.utils.actors.OrderedExecutor.doTask(OrderedExecutor.java:31)
>  at 
> org.apache.activemq.artemis.utils.actors.ProcessorBase.executePendingTasks(ProcessorBase.java:65)
>  at 
> org.apache.activemq.artemis.utils.actors.ProcessorBase$$Lambda$10/1881901842.run(Unknown
>  Source)
>  at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  at 
> org.apache.activemq.artemis.utils.ActiveMQThreadFactory$1.run(ActiveMQThreadFactory.java:118)
> Number of locked synchronizers = 2
> java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@5bf57f48 
> java.util.concurrent.ThreadPoolExecutor$Worker@588703a3 {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to